DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world
Page Scrape Using Ruby
// Hypemachine offers some tasty popular artists that I will now grab and place in a human readable file.
require 'rubygems'
require 'hpricot'
require 'open-uri'
puts "fetching artist names..."
# popular artists on hypem
hdrs = {"User-Agent"=>"Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1", "Accept-Charset"=>"utf-8", "Accept"=>"text/html"}
my_html = ""
open("http://hypem.com/popular", hdrs).each {|s| my_html << s}
#create Hpricot object
@web_doc= Hpricot(my_html)
array=[]
puts "writing to file..."
if @web_doc!=nil
@hypem_artists = []
#parse document with hpricot object
@web_doc.search("a[@class*=artist]").each {|artist| array<<artist.innerHTML}
#you could number the results with a count
count=0
array.each do |a|
artist_name = a.to_s.strip!
#using this to match up to your database using something like sphinx?
#sphinx_array = {:limit => 1, :mode => :any, :sort_mode => [:relevance, 'name']}
#tmp=Artist.find_with_sphinx(artist_name, :sphinx => sphinx_array)
#if(!tmp.nil?)
#@hypem_artists << tmp[0]
#or if you're not...
@hypem_artists << artist_name
end
@hypem_artists.uniq!
if !File.exist? "hypem.csv"
File.open("hypem.csv", 'w') {|f| f.write("")}
end
File.open("hypem.csv", 'a+') {|f|
f.write (Time.now.to_s + "\n" + "===================" + "\n")
@hypem_artists.each do |artist|
f.write (artist + ", ")
end
}
f.write("\n")
puts "success!"
else
puts 'error parsing page!'
end





