<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DZone Snippets: Motoras's Code Snippets</title>
    <link>http://snippets.dzone.com/posts</link>
    <pubDate>Fri, 25 Jul 2008 15:50:34 GMT</pubDate>
    <description>DZone Snippets: Motoras's Code Snippets</description>
    <item>
      <title>Organize photos and other files on an year/month/day folders structures</title>
      <link>http://snippets.dzone.com/posts/show/3859</link>
      <description>This script can organize you media colelction in folders by year/month/day. &lt;br /&gt;For jpeg files, looks in exif for the creation date, if that file has that kind of metadadata. For any other files it only checks the creation time.&lt;br /&gt;The script takes 3 option on the command line. A command which may be -m, -c or -f, a source folder and a destination folder.&lt;br /&gt;Using -c will copy the files from souce to destination, -m will move them, - f will also move them, by making first a copy then a delete. -f option may used when -m doesn't work, the most common situation beeing when you want to move the files from one file system to another(e.g. from your ext3 local hard drive to an external fat32 usb harddrive)&lt;br /&gt;After you organize the files, you can find duplicate items using my previous snippet.&lt;br /&gt;This script have been inspired from  &lt;a href="http://weblog.patrice.ch/articles/tag/exif"&gt;here&lt;/a&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;#!/usr/local/bin/ruby&lt;br /&gt;require 'rubygems'&lt;br /&gt;require 'ftools'&lt;br /&gt;require 'exifr'&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;def process_file(file_path,destination_dir)&lt;br /&gt;	if File.directory?(file_path)&lt;br /&gt;		crt_dir = Dir.new(file_path)&lt;br /&gt;		crt_dir.each do |file_name|&lt;br /&gt;			if file_name != '.' &amp;&amp;  file_name != '..'				&lt;br /&gt;				process_file("#{crt_dir.path}/#{file_name}",destination_dir)		&lt;br /&gt;			end&lt;br /&gt;		end	&lt;br /&gt;	else&lt;br /&gt;			&lt;br /&gt;		if  File.fnmatch('*.jpg',file_path) ||  File.fnmatch('*.jpeg',file_path)&lt;br /&gt;			picture = EXIFR::JPEG.new(file_path)&lt;br /&gt;			if picture != nil &amp;&amp; picture.exif != nil&lt;br /&gt;				file_date = picture.date_time		&lt;br /&gt;			else&lt;br /&gt;				f = File.new(file_path)&lt;br /&gt;				file_date = f.mtime&lt;br /&gt;			end							&lt;br /&gt;		end&lt;br /&gt;		if file_date == nil&lt;br /&gt;			f = File.new(file_path)&lt;br /&gt;			file_date = f.mtime	&lt;br /&gt;		end		&lt;br /&gt;		year_dir =  destination_dir + file_date.strftime("%Y")&lt;br /&gt;		month_dir = destination_dir + file_date.strftime("%Y/%m-%b")&lt;br /&gt;		day_dir = destination_dir + file_date.strftime("%Y/%m-%b/%d")&lt;br /&gt;		new_file_name = day_dir + "/" + File.basename(file_path)&lt;br /&gt;		begin&lt;br /&gt;			Dir.mkdir(year_dir) unless File.exists?(year_dir)&lt;br /&gt;			Dir.mkdir(month_dir) unless File.exists?(month_dir)&lt;br /&gt;			Dir.mkdir(day_dir) unless File.exists?(day_dir)&lt;br /&gt;			if ARGV[0 ] =='-m' #move the files&lt;br /&gt;				File.rename(file_path, new_file_name)&lt;br /&gt;			elsif ARGV[0]  =='-c' #copy the files&lt;br /&gt;				File.cp(file_path, new_file_name)	&lt;br /&gt;			elsif ARGV[0]  =='-f' #copy and delete, acts like a move between thw different file systems	&lt;br /&gt;				File.cp(file_path, new_file_name)&lt;br /&gt;				File.delete(file_path)	&lt;br /&gt;			else&lt;br /&gt;				puts "Unknown option #{ARGV[0]}"&lt;br /&gt;				exit	&lt;br /&gt;			end	&lt;br /&gt;		end	&lt;br /&gt;		&lt;br /&gt;	end	&lt;br /&gt;end&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;if ARGV.length != 3&lt;br /&gt;	puts "Three arguments are required to run the script, -c|-m|-f &lt;source_folder_or_file&gt;  &lt;destination_folder&gt;"&lt;br /&gt;	exit&lt;br /&gt;end&lt;br /&gt;&lt;br /&gt;if ARGV[0] !='-c'  &amp;&amp; ARGV[0]!='-m' &amp;&amp; ARGV[0]!='-f'&lt;br /&gt;	puts "Unknown running option: #{ARGV[0]}"&lt;br /&gt;	exit&lt;br /&gt;end&lt;br /&gt;&lt;br /&gt;if not File.exists?(ARGV[1]) &lt;br /&gt;	puts "Source file does not exists: #{ARGV[1]}"&lt;br /&gt;	exit&lt;br /&gt;end&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;if not File.directory?(ARGV[2]) &lt;br /&gt;	puts "Destination file is not a directory #{ARGV[2]}"&lt;br /&gt;	exit&lt;br /&gt;end&lt;br /&gt;&lt;br /&gt;if ARGV[1]==ARGV[2]&lt;br /&gt;	puts "Source and destination must be different"&lt;br /&gt;	exit&lt;br /&gt;end	&lt;br /&gt;	&lt;br /&gt;	&lt;br /&gt;process_file(ARGV[1], ARGV[2])&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Mon, 23 Apr 2007 16:09:58 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/3859</guid>
      <author>motoras (Romulus Pasca)</author>
    </item>
    <item>
      <title>How to identify files which are identical  in a directory</title>
      <link>http://snippets.dzone.com/posts/show/3858</link>
      <description>I use this script to find identical files with different names on a folder. It is very usefull to when you want to check your media collection for duplicates items. Basically it scans the directory and its subfolders, computes the md5 sum for each file, and keep them in a hash so they can be compared. The buffer size can be bigger or smaller, regarding the amount of memory you want to use. For large directories it can take a while to run, but it worked fine, at least for my 20+GB pictures collection.&lt;br /&gt;&lt;code&gt;&lt;br /&gt;#!/usr/local/bin/ruby&lt;br /&gt;require 'digest/md5'&lt;br /&gt;require 'digest/sha1'&lt;br /&gt;&lt;br /&gt;$BUF_SIZE = 1024*1024*1024&lt;br /&gt;&lt;br /&gt;class Folder_Md5&lt;br /&gt;	&lt;br /&gt;	def initialize(folder)&lt;br /&gt;		@md5_to_files = Hash.new	&lt;br /&gt;		@folder = folder&lt;br /&gt;	end	&lt;br /&gt;		&lt;br /&gt;	def scan&lt;br /&gt;		@md5_to_files.clear&lt;br /&gt;		compute_md5(@folder)&lt;br /&gt;	end		&lt;br /&gt;	&lt;br /&gt;	def md5_for_file(file_path)&lt;br /&gt;		@md5_to_files[file_path]&lt;br /&gt;	end	&lt;br /&gt;	&lt;br /&gt;	def identical_count&lt;br /&gt;		total = 0&lt;br /&gt;		@md5_to_files.each_value do |value|&lt;br /&gt;				if value.size &gt;= 2&lt;br /&gt;					total+= value.size		&lt;br /&gt;				end &lt;br /&gt;			end&lt;br /&gt;		return total	&lt;br /&gt;	end	&lt;br /&gt;	&lt;br /&gt;	def list_identical&lt;br /&gt;		total = 0&lt;br /&gt;		identities = 0&lt;br /&gt;		puts 'The List of identical files'&lt;br /&gt;		@md5_to_files.each_value do |value|&lt;br /&gt;				if value.size &gt;= 2&lt;br /&gt;					identities+=1&lt;br /&gt;					total+= value.size&lt;br /&gt;					puts 'Idenitical files:'&lt;br /&gt;					value.each{|file_name| puts file_name}&lt;br /&gt;				end &lt;br /&gt;			end&lt;br /&gt;		puts "got #{identities} identities impling #{total} files"&lt;br /&gt;	end&lt;br /&gt;	&lt;br /&gt;	private	&lt;br /&gt;		def compute_md5(file_path)&lt;br /&gt;			if File.directory?(file_path)&lt;br /&gt;				crt_dir = Dir.new(file_path)&lt;br /&gt;				crt_dir.each do |file_name|&lt;br /&gt;					if file_name != '.' &amp;&amp;  file_name != '..'				&lt;br /&gt;						compute_md5("#{crt_dir.path}#{file_name}")		&lt;br /&gt;					end&lt;br /&gt;				end	&lt;br /&gt;			else&lt;br /&gt;				md5_val = md5(file_path)&lt;br /&gt;				if @md5_to_files[md5_val] == nil&lt;br /&gt;					@md5_to_files[md5_val]  = [file_path]	&lt;br /&gt;				else					&lt;br /&gt;					 @md5_to_files[md5_val] &lt;&lt; file_path&lt;br /&gt;				end		&lt;br /&gt;			end&lt;br /&gt;		end&lt;br /&gt;		&lt;br /&gt;		def md5(file_path)&lt;br /&gt;			hasher = Digest::MD5.new&lt;br /&gt;			open(file_path, "r") do |io|&lt;br /&gt;				counter = 0&lt;br /&gt;				while (!io.eof)&lt;br /&gt;					readBuf = io.readpartial($BUF_SIZE)&lt;br /&gt;					putc '.' if ((counter+=1) % 3 == 0)&lt;br /&gt;					hasher.update(readBuf)&lt;br /&gt;				end&lt;br /&gt;			end			&lt;br /&gt;			return hasher.hexdigest&lt;br /&gt;		end	&lt;br /&gt;end&lt;br /&gt;&lt;br /&gt;worker = Folder_Md5.new(ARGV[0])&lt;br /&gt;worker.scan&lt;br /&gt;worker.list_identical&lt;br /&gt;&lt;/code&gt;</description>
      <pubDate>Mon, 23 Apr 2007 16:00:08 GMT</pubDate>
      <guid>http://snippets.dzone.com/posts/show/3858</guid>
      <author>motoras (Romulus Pasca)</author>
    </item>
  </channel>
</rss>
