<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DZone Snippets: scraping code</title>
    <link>http://snippets.dzone.com/posts</link>
    <pubDate>Fri, 25 Jul 2008 05:41:42 GMT</pubDate>
    <description>DZone Snippets: scraping code</description>
    <item>
      <title>Mechanize / Hpricot / Scraping setup</title>
      <link>http://snippets.dzone.com/posts/show/4618</link>
      <description>&lt;code&gt;&lt;br /&gt;require 'rubygems'&lt;br /&gt;require 'cgi'&lt;br /&gt;require 'open-uri'&lt;br /&gt;require 'hpricot'&lt;br /&gt;require 'mechanize'&lt;br /&gt;&lt;br /&gt;agent = WWW::Mechanize.new&lt;br /&gt;doc = Hpricot(agent.get(the_url).parser.to_s)&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Thu, 04 Oct 2007 22:09:17 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/4618</guid>
      <author>sikelianos (Zeke Sikelianos)</author>
    </item>
    <item>
      <title>Scraping Google Search Results with Hpricot</title>
      <link>http://snippets.dzone.com/posts/show/4133</link>
      <description>// snagged from http://g-module.rubyforge.org/&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;require 'rubygems'&lt;br /&gt;require 'cgi'&lt;br /&gt;require 'open-uri'&lt;br /&gt;require 'hpricot'&lt;br /&gt;&lt;br /&gt;q = %w{meine kleine suchanfrage}.map { |w| CGI.escape(w) }.join("+")&lt;br /&gt;url = "http://www.google.com/search?q=#{q}"&lt;br /&gt;doc = Hpricot(open(url).read)&lt;br /&gt;lucky_url = (doc/"div[@class='g'] a").first["href"]&lt;br /&gt;system 'open #{lucky_url}'&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Wed, 13 Jun 2007 00:29:06 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/4133</guid>
      <author>sikelianos (Zeke Sikelianos)</author>
    </item>
  </channel>
</rss>
