These two helpers will strip all HTML tags from user input you don't want and also auto-close tags that were accidentally (?) left open.
def format_user_text(input)
output = "<p>#{input.strip}</p>"
output.gsub!(/\r\n/, "\n")
output.gsub!(/^$\s*/m, "\n")
output.gsub!(/\n{3,}/, "\n\n")
output.gsub!(/\n\n/, '</p><p>')
output.gsub!(/\n/, '<br/>')
sanitize_fu output
end
def sanitize_fu(html, okTags = 'a href, b, br, p, i, em')
soloTags = ["br","hr"]
tags = okTags.downcase().split(',').collect!{ |s| s.split(' ') }
allowed = Hash.new
tags.each do |s|
key = s.shift
allowed[key] = s
end
stack = Array.new
result = html.gsub( /(<.*?>)/m ) do | element |
if element =~ /\A<\/(\w+)/ then
tag = $1.downcase
if allowed.include?(tag) && stack.include?(tag) then
top = stack.pop
out = "</#{top}>"
until top == tag do
top = stack.pop
out << "</#{top}>"
end
out
end
elsif element =~ /\A<(\w+)\s*\/>/
tag = $1.downcase
if allowed.include?(tag) then
"<#{tag} />"
end
elsif element =~ /\A<(\w+)/ then
tag = $1.downcase
if allowed.include?(tag) then
if ! soloTags.include?(tag) then
stack.push(tag)
end
if allowed[tag].length == 0 then
"<#{tag}>"
else
out = "<#{tag}"
while ( $' =~ /(\w+)=("[^"]+")/ )
attr = $1.downcase
valu = $2
if allowed[tag].include?(attr) then
out << " #{attr}=#{valu}"
end
end
out << ">"
end
end
end
end
while result.sub!(/\A([^<]*)>/m) { $1 } do end
while result.sub!(/<([^>]*)\Z/m) { $1 } do end
if stack.length > 0 then
result << "</#{stack.reverse.join('></')}>"
end
result
end