Decode html entities
print decode_htmlentities("l'eau")
1 2 from htmlentitydefs import name2codepoint as n2cp 3 import re 4 5 def substitute_entity(match): 6 ent = match.group(2) 7 if match.group(1) == "#": 8 return unichr(int(ent)) 9 else: 10 cp = n2cp.get(ent) 11 12 if cp: 13 return unichr(cp) 14 else: 15 return match.group() 16 17 def decode_htmlentities(string): 18 entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") 19 return entity_re.subn(substitute_entity, string)[0]