#!/usr/bin/env python # -*- encoding: latin1 -*- import BeautifulSoup from PyRSS2Gen import RSSItem, Guid import ScrapeNFeed import urllib2 import re debug = 0 def fetch(url): response = urllib2.urlopen(urllib2.Request(url)) return response.read(),response.info() class HeiFeed(ScrapeNFeed.ScrapedFeed): def HTML2RSS(self, headers, body): items = [] soup = BeautifulSoup.BeautifulSoup(body) for item in soup('a', {'href' : re.compile('^meldung.*')}): link = 'http://www.heise.de/newsticker/' + item['href'] if not self.hasSeen(link): title = item.contents[0].strip() if debug: print "title: " + title print "link : " + link response, headers = fetch(link) s = BeautifulSoup.BeautifulSoup(response) desc = s.fetch('div',{'class':'meldung_wrapper'})[0].prettify() items.append(RSSItem(title=title, description=desc, link=link)) self.addRSSItems(items) HeiFeed.load("heise.de newsticker", 'http://www.heise.de/newsticker/', "heise.de newsticker", 'heise_rss.xml', 'heise_rss.pickle', managingEditor = 'tsch')
You need to create an account or log in to post comments to this site.