Download the newest version here:
http://untu.ms/danbooru/ 1 '''works with http://danbooro.donmai.us/
2 automated content downloading by tags
3 released as public domain
4 author reinis ivanovs
5 http//untu.ms/'''
6
7 from xml.dom import minidom
8 from sys import stdout
9 from optparse import OptionParser
10 import os, urllib
11
12 class SpiderOpener(urllib.FancyURLopener):
13 version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1'
14 urllib._urlopener = SpiderOpener()
15
16 parser = OptionParser(add_help_option=False)
17 parser.add_option('-l', dest='limit', metavar='NUM', default=1000, type='int')
18 parser.add_option('-o', dest='offset', metavar='NUM', default=0, type='int')
19 parser.add_option('-s', dest='server', metavar='NUM', default=0, type='int')
20 options, args = parser.parse_args()
21
22 queue = []
23 tags = '+'.join([urllib.quote(item).replace('%2B', '+') for item in args])
24
25 servers = [('danboofoo.macrochan.org', 'data'), ('dan.paramnesiac.net', 'data'), ('danbooru.imouto.org', 'data'), ('danbooru.darkmirage.com', 'data'), ('danbooru.desudesu.org', 'data'), ('danbooru.fumbari.com', 'data'), ('danbooru.sr3r.net', 'data'), ('danbooru.ichijou.org', 'data'), ('danbooru.neoragod.net', 'data'), ('saguratus.com', 'danbooru/data'), ('danbooru.chiisai.net', 'data'), ('danbooru.lolitron.org', 'data'), ('danbooru.zomgwtf.net', 'data')]
26 server, path = servers[options.server]
27 api = 'http://danbooru.donmai.us/api/find_posts'
28
29 if not os.path.exists(tags):
30 os.mkdir(tags)
31 os.chdir(tags)
32
33 def reporthook(blocks, blocksize, filesize):
34 stdout.write('\r%d%%' % min(round(float(blocks)*blocksize/filesize, 2)*100, 100))
35
36 def spider(url):
37 print url
38 document = minidom.parse(urllib.urlopen(url))
39 global queue
40 for child in document.documentElement.childNodes:
41 if not child.nodeType is 1: continue
42 queue.append(child.attributes['file_name'].value)
43 print len(queue)
44 if len(queue) == 0:
45 exit()
46 for i in range(len(queue)):
47 download()
48
49 def download():
50 global server, path
51 filename = queue.pop()
52 url = 'http://%s/%s/%s/%s/%s' % (server, path, filename[0:2], filename[2:4], filename)
53 print url
54 if os.path.exists(filename):
55 print '+'
56 return
57 urllib.urlretrieve(url, filename, reporthook)
58 stdout.write('\n')
59
60 def main():
61 global queue, tags, options
62 offset, limit = options.offset, options.limit
63 for i in range(offset, limit-100, 100):
64 spider('%s?tags=%s&offset=%d&limit=%d' % (api, tags, i, i+100))
65 spider('%s?tags=%s&offset=%d&limit=%d' % (api, tags, offset+limit-limit%100, limit))
66
67 if __name__ == '__main__':
68 main()