Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

About this user

Andrew Pennebaker http://mcandre.devjavu.com/wiki

« Newer Snippets
Older Snippets »
Showing 1-2 of 2 total  RSS 

itcrowdquote.rb

// Prints a quote from Channel 4's "The IT Crowd"

#!/usr/bin/env ruby

require "rubygems"
require "open-uri"
require "hpricot"
require "htmlentities"

coder=HTMLEntities.new()

doc=open("http://www.channel4.com/entertainment/tv/microsites/I/itcrowd/quote_generator/") { |f| Hpricot(f) }

section=doc/"blockquote"/"p"
(section/"cite").remove()
quote=section.inner_html

# remove leading whitespace
quote=quote.gsub(/^\s+/, "")

# remove trailing whitespace
quote=quote.gsub(/\s+$/, $/)

# remove dash
quote=quote.gsub(/\s\-\s+$/, $/).chomp

# decode HTML entities
quote=coder.decode(quote)

puts quote

html2txt.py

#!/usr/bin/env python

__author__="Andrew Pennebaker (andrew.pennebaker@gmail.com)"
__date__="10 Dec 2006"
__copyright__="Copyright 2006 Andrew Pennebaker"
__license__="GPL"
__version__="0.0.1"
__credits__="Based on http://mail.python.org/pipermail/python-list/2004-November/291562.html"
__URL__="http://snippets.dzone.com/posts/show/3127"

import htmllib
from sgmllib import SGMLParser

import sys

class html2txt(SGMLParser):
	"""html2txt()"""

	def reset(self):
		SGMLParser.reset(self)
		self.pieces=[]

	def handle_data(self, text):
		self.pieces.append(text)

	def unknown_starttag(self, tag, attributes):
		pass

	def unknown_endtag(self, tag):
		pass

	def handle_entityref(self, ref):
		try:
			self.pieces.append(htmllib.HTMLParser.entitydefs[ref])
		except KeyError, e:
			self.pieces.append("&"+ref)

	def output(self):
		return "".join(self.pieces)

if __name__=="__main__":
	html="".join(sys.stdin.readlines())

	parser = html2txt()
	parser.feed(html)
	parser.close()

	print parser.output()
« Newer Snippets
Older Snippets »
Showing 1-2 of 2 total  RSS