import BeautifulSoup
from PyRSS2Gen import RSSItem, Guid
import ScrapeNFeed
import urllib2
import re
debug = 0
def fetch(url):
response = urllib2.urlopen(urllib2.Request(url))
return response.read(),response.info()
class HeiFeed(ScrapeNFeed.ScrapedFeed):
def HTML2RSS(self, headers, body):
items = []
soup = BeautifulSoup.BeautifulSoup(body)
for item in soup('a', {'href' : re.compile('^meldung.*')}):
link = 'http://www.heise.de/newsticker/' + item['href']
if not self.hasSeen(link):
title = item.contents[0].strip()
if debug:
print "title: " + title
print "link : " + link
response, headers = fetch(link)
s = BeautifulSoup.BeautifulSoup(response)
desc = s.fetch('div',{'class':'meldung_wrapper'})[0].prettify()
items.append(RSSItem(title=title, description=desc, link=link))
self.addRSSItems(items)
HeiFeed.load("heise.de newsticker", 'http://www.heise.de/newsticker/',
"heise.de newsticker", 'heise_rss.xml', 'heise_rss.pickle',
managingEditor = 'tsch')