Parse RSS from a specific url
1 require 'rexml/document' 2 class ParseRss 3 def initialize(url) 4 @url = url 5 end 6 7 def parse 8 @content = Net::HTTP.get(URI.parse(@url)) 9 xml = REXML::Document.new(@content) 10 data = {} 11 data['title'] = xml.root.elements['channel/title'].text 12 data['home_url'] = xml.root.elements['channel/link'].text 13 data['rss_url'] = @url 14 data['items'] = [] 15 xml.elements.each('//item') do |item| 16 it = {} 17 it['title'] = item.elements['title'].text 18 it['link'] = item.elements['link'].text 19 it['description'] = item.elements['description'].text 20 if item.elements['dc:creator'] 21 it['author'] = item.elements['dc:creator'].text 22 end 23 if item.elements['dc:date'] 24 it['publication_date'] = item.elements['dc:date'].text 25 elsif item.elements['pubDate'] 26 it['publication_date'] = item.elements['pubDate'].text 27 end 28 data['items'] << it 29 end 30 data 31 end 32 end
Used like so: ParseRss.new('http://someurl.com/rss').parse(). It returns a hash full of nice RSS goodness that you can use as you wish