#!/usr/bin/env ruby require 'rubygems' require 'hpricot' require 'open-uri' #a Trac repo scraper. pass the url to scrape (the root of a repo) # and optionally the local path to write to. defaults to . class TracRepoScraper def initialize(trac_url, local_path='.') @trac_url = trac_url trac_url =~ /(http:\/\/.*?)\// @trac_server = $1 @local_path = local_path end def getallfiles(url,cur_localpath) if cur_localpath != '.' Dir.mkdir(cur_localpath) end doc = Hpricot(open(url).read) doc.search("//tbody//tr//td//a[@class='file']").each do |file_anchor| #get the file as curpath+/file_name #following gives us absolute path (excluding domain) actual_file_url = @trac_server + file_anchor['href']+'?format=raw' #temp puts "Saving #{actual_file_url} to #{cur_localpath}/#{file_anchor.inner_html}" #read the file and write to a file in the correct directory File.open(cur_localpath+"/"+file_anchor.inner_html, 'w') do |f| remote_file = open(actual_file_url) remote_file.each { |line| f.puts(line) } end end doc.search("//tbody//tr//td//a[@class='dir']").each do |dir_anchor| #go into the directory dir_url = @trac_server + dir_anchor['href'] puts "*** stepping into #{dir_url}" #dir_anchor.inner_html is the name of the subdirectory (relative) getallfiles(dir_url, cur_localpath+"/"+dir_anchor.inner_html) end end def start getallfiles(@trac_url, @local_path) end end #### main trac_url = ARGV[0] localpath = '.' if ARGV[1] if !ARGV[1].strip.empty? localpath = ARGV[1].strip end end TracRepoScraper.new(trac_url, localpath).start
You need to create an account or log in to post comments to this site.