Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

About this user

Reinis Ivanovs http://untu.ms/

« Newer Snippets
Older Snippets »
Showing 1-1 of 1 total  RSS 

danbooru robot

Download the newest version here: http://untu.ms/danbooru/

   1  '''works with http://danbooro.donmai.us/
   2     automated content downloading by tags
   3     released as public domain
   4     author reinis ivanovs
   5     http//untu.ms/'''
   6  
   7  from xml.dom import minidom
   8  from sys import stdout
   9  from optparse import OptionParser
  10  import os, urllib
  11  
  12  class SpiderOpener(urllib.FancyURLopener):
  13      version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1'
  14  urllib._urlopener = SpiderOpener()
  15  
  16  parser = OptionParser(add_help_option=False)
  17  parser.add_option('-l', dest='limit', metavar='NUM', default=1000, type='int')
  18  parser.add_option('-o', dest='offset', metavar='NUM', default=0, type='int')
  19  parser.add_option('-s', dest='server', metavar='NUM', default=0, type='int')
  20  options, args = parser.parse_args()
  21  
  22  queue = []
  23  tags = '+'.join([urllib.quote(item).replace('%2B', '+') for item in args])
  24  
  25  servers = [('danboofoo.macrochan.org', 'data'), ('dan.paramnesiac.net', 'data'), ('danbooru.imouto.org', 'data'), ('danbooru.darkmirage.com', 'data'), ('danbooru.desudesu.org', 'data'), ('danbooru.fumbari.com', 'data'), ('danbooru.sr3r.net', 'data'), ('danbooru.ichijou.org', 'data'), ('danbooru.neoragod.net', 'data'), ('saguratus.com', 'danbooru/data'), ('danbooru.chiisai.net', 'data'), ('danbooru.lolitron.org', 'data'), ('danbooru.zomgwtf.net', 'data')]
  26  server, path = servers[options.server]
  27  api = 'http://danbooru.donmai.us/api/find_posts'
  28  
  29  if not os.path.exists(tags):
  30      os.mkdir(tags)
  31  os.chdir(tags)
  32  
  33  def reporthook(blocks, blocksize, filesize):
  34      stdout.write('\r%d%%' % min(round(float(blocks)*blocksize/filesize, 2)*100, 100))
  35  
  36  def spider(url):
  37      print url
  38      document = minidom.parse(urllib.urlopen(url))
  39      global queue
  40      for child in document.documentElement.childNodes:
  41          if not child.nodeType is 1: continue
  42          queue.append(child.attributes['file_name'].value)
  43      print len(queue)
  44      if len(queue) == 0:
  45          exit()
  46      for i in range(len(queue)):
  47          download()
  48  
  49  def download():
  50      global server, path
  51      filename = queue.pop()
  52      url = 'http://%s/%s/%s/%s/%s' % (server, path, filename[0:2], filename[2:4], filename)
  53      print url
  54      if os.path.exists(filename):
  55          print '+'
  56          return
  57      urllib.urlretrieve(url, filename, reporthook)
  58      stdout.write('\n')
  59  
  60  def main():
  61      global queue, tags, options
  62      offset, limit = options.offset, options.limit
  63      for i in range(offset, limit-100, 100):
  64          spider('%s?tags=%s&offset=%d&limit=%d' % (api, tags, i, i+100))
  65      spider('%s?tags=%s&offset=%d&limit=%d' % (api, tags, offset+limit-limit%100, limit))
  66  
  67  if __name__ == '__main__':
  68      main()
« Newer Snippets
Older Snippets »
Showing 1-1 of 1 total  RSS