Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

« Newer Snippets
Older Snippets »
Showing 1-3 of 3 total  RSS 

Screen scape heise.de newsticker (german)

#!/usr/bin/env python
# -*- encoding: latin1 -*-

import BeautifulSoup
from PyRSS2Gen import RSSItem, Guid
import ScrapeNFeed
import urllib2
import re

debug = 0

def fetch(url):
    response = urllib2.urlopen(urllib2.Request(url))
    return response.read(),response.info()

class HeiFeed(ScrapeNFeed.ScrapedFeed):    
    def HTML2RSS(self, headers, body):
        items = []
        soup = BeautifulSoup.BeautifulSoup(body)
        for item in soup('a', {'href' : re.compile('^meldung.*')}):
            link = 'http://www.heise.de/newsticker/' + item['href']
            if not self.hasSeen(link):
                title = item.contents[0].strip()
                if debug:
                    print "title: " + title
                    print "link : " + link
                response, headers = fetch(link)
                s = BeautifulSoup.BeautifulSoup(response)
                desc = s.fetch('div',{'class':'meldung_wrapper'})[0].prettify()
                items.append(RSSItem(title=title, description=desc, link=link))
            self.addRSSItems(items)

HeiFeed.load("heise.de newsticker", 'http://www.heise.de/newsticker/',
             "heise.de newsticker", 'heise_rss.xml', 'heise_rss.pickle',
             managingEditor = 'tsch')

Create an RSS feed from an SQL query

#!/usr/bin/env python
# -*- encoding: latin1 -*-

import datetime,PyRSS2Gen,sqlobject
from sqlobject.postgres import builder

con = builder()(user = 'user', passwd = '', host = 'localhost', db='name')

# set db encoding (maybe optional)
con.queryOne("SET client_encoding TO 'latin1'; SELECT 1;")

items = []
for res in con.queryAll("""SELECT title,url,datum,description FROM table ORDER BY datum DESC LIMIT 30"""):
    items.append(
        PyRSS2Gen.RSSItem(
        title = res[0], link = res[1],
        description = """<h2>%s</h2>on %s<br/><p>%s</p>"""%(res[0],res[2],res[]3),
        guid = PyRSS2Gen.Guid(res[1]), pubDate = res[2]))

    # generate rss feed
PyRSS2Gen.RSS2(
    title         = "sql2rss feed",
    link          = "http://localhost/die URL",
    description   = "The latest sql2rss news",
    lastBuildDate = datetime.datetime.now(),
    items         = items).write_xml(open("sql2rss.xml", "w"))

A simple python class to browse snippets website (with beautifoulsoup)

if you got some path/enhancements, you can mail me at my pseudo at gmail.com, i'll update it.
(you should install the marvellous beautifulsoup module, http://www.crummy.com/software/BeautifulSoup/documentation.html)

the snippets.py file :
from BeautifulSoup import BeautifulSoup
import urllib

class Keyword: # top tags
    def __init__(self,tag,nb):
        self.tag=tag
        self.nb=int(nb)
    def __repr__(self):
        return "<Keyword '%s' : %d>" % (self.tag,self.nb)

class Snippet:
    def __init__(self,title,code,tags):
        self.title=title
        self.code=code
        self.tags = tags
    def __repr__(self):
        return "<Snippet '%s' : tags %s>" % (self.title,str(self.tags))

class Snippets:
    urlForTags = "http://www.bigbold.com/snippets/tags"
    
    def __init__(self,l=[]):
        url = self.__getUrlForTags(l)
        
        #load the url
        fu = urllib.urlopen(url)
        content = fu.read()
        fu.close()

        self.tags = l
        self.keywords,self.snippets = self.__extractContent(content)

    def __repr__(self):
        return "<Snippets for tags:%s>" % (str(self.tags))

    def __getUrlForTags(self, l ):
        assert type(l)==list
        l = [Snippets.urlForTags] + l
        return "/".join(l)
    
    def __extractContent(self,content):
        
        soup = BeautifulSoup( content ) 
            
        # get the keywords
        tagTable=soup('div', {'id' : "sidebar"})[0].table
        keywords=[]
        for i in tagTable("tr"):
            td = i("td")
            
            # add this keyword
            try:
                # extract from the empty selection page "/tags"
                keywords.append( Keyword(td[1].span.a.string , td[0].string) )
            except TypeError:
                # extract from a selected selection page "/tag/something"
                keywords.append( Keyword(td[2].span.a.string , td[1].string) )
        
        # get the snippets
        postList=soup('div', {'class' : "post"})
        snippets=[]
        for i in postList:
            divs = i("div")
            
            # get title and tags
            title =  divs[0].h3.a.string # title
            tags = [j.string for j in divs[1]("a")][:-1] #don't get the user ;-)

            # get code of the snippet
            list = [j for j in divs[0]][1:]# zap the first (h3)
            code=""
            for i in list: 
                try:
                    if i.name == "pre":
                        try:
                            code+=i.string
                        except TypeError:
                            pass
                except AttributeError:
                    # transform "out-pre-text" in comment
                    out = str(i).strip()
                    if out:
                        code+="#| "+out+"\n" 
            
            # add this snippet
            snippets.append( Snippet(title,code,tags) )
            
        return keywords,snippets


and an example (all returned "strings" are in utf-8):
from snippets import Snippets

s = Snippets(["python","xml"])
print s
print s.keywords # the "top tags" column
for i in s.snippets:
    print i
print s.snippets[6].title # the title of the 6th
print s.snippets[6].code  # the code of the 6th
« Newer Snippets
Older Snippets »
Showing 1-3 of 3 total  RSS