#!/usr/bin/ruby # file: thegang.rb require 'rexml/document' include REXML class TheGang def initialize() end def rssify() file = File.new('thegang.xml','r') doc = Document.new(file) rss_doc = Document.new root = Element.new('rss') rss_doc.add_element(root) doc.root.elements.each("body/div/ul/li/h2/a") do |node| o_rssitem = Element.new('item') o_li = node.parent.parent o_rsstitle = Element.new('title') o_rsstitle.text = node.text.gsub(/[\n,' ']/,'') o_rssitem.add_element(o_rsstitle) o_rsshref_audio = Element.new('href_audio') o_rsshref_audio.text = node.attributes.get_attribute('href').to_s.gsub('amp;&','') o_rssitem.add_element(o_rsshref_audio) o_rsshref = Element.new('href') o_rsshref.text = o_rsshref_audio.text.gsub('&from=audio','') o_rssitem.add_element(o_rsshref) o_rssdate = Element.new('date') o_rssdate.text = "#{o_li.elements["p/span[1]"].text} #{o_li.elements["p/span[2]"].text}" o_rssitem.add_element(o_rssdate) rss_doc.root.add_element(o_rssitem) end file = File.new('thegang_rss.xml','w') file.puts rss_doc file.close end end if __FILE__ == $0 gang = TheGang.new gang.rssify end
see also: www.dapper.net
output (extract)
<rss> <item><title>TheGangXII-II</title><href_audio>/gangitem/id=6501&from=audio</href_audio><href>/gangitem/id=6501</href><date>Jan 25</date></item> <item><title>TheGangXII-I</title><href_audio>/gangitem/id=6499&from=audio</href_audio><href>/gangitem/id=6499</href><date>Jan 25</date></item> <item><title>NewsGangLive01.24.08</title><href_audio>/gangitem/id=6445&from=audio</href_audio><href>/gangitem/id=6445</href><date>Jan 24</date></item> <item><title>NewsGangLiveII</title><href_audio>/gangitem/id=6377&from=audio</href_audio><href>/gangitem/id=6377</href><date>Jan 23</date></item> ... </rss>