Never been to DZone Snippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

About this user

Reinis Ivanovs http://untu.ms/

« Newer Snippets
Older Snippets »
Showing 1-6 of 6 total  RSS 

walk a path

#!/usr/bin/env python

import os

from os.path import isdir, abspath, join
from glob import iglob


def walk(name='.'):
    print abspath(name)
    for item in iglob(join(name, '*')):
        if isdir(item):
            walk(item)
        else:
            print abspath(item)


if __name__ == '__main__':
    walk()

Custom SciTE config

Uncommented, but might be useful to someone.

code.page=65001
output.code.page=65001
tabsize=4
indent.size=4
use.tabs=1
use.monospaced=1
autocompleteword.automatic=1
toolbar.visible=0
save.session=1
view.whitespace=1
view.indentation.whitespace=1
view.indentation.guides=1
highlight.indentation.guides=1
strip.trailing.spaces=1
ensure.final.line.end=1
default.file.ext=.py
time.commands=1
position.width=-1
position.height=-1
split.vertical=0
load.on.activate=1
are.you.sure.on.reload=1
reload.preserves.undo=1
backspace.unindents=1
eol.mode=LF
eol.auto=1
font.monospace=font:Consolas,size:11
start.in.monospaced.mode=1

expandtabs.py

Convert tabs to spaces.

from sys import stdin, stdout

stdout.write(stdin.read().expandtabs(4))

To use in SciTE:
command.name.3.*=Expand Tabs
command.mode.3.*=savebefore:yes,groupundo:yes,replaceselection:yes,quiet:yes
command.input.3.*=$(CurrentSelection)
command.3.*=python c:\dev\expandtabs.py

wraplines.py

Apply a pattern to all lines in a text file.

Usage: wraplines.py source destination pattern [encoding]
Example: wraplines data.txt - '%s',\n\n iso-8859-13

from codecs import open
from sys import argv

def main():
    if len(argv) < 4:
        exit('usage: %s source destination pattern [encoding]' % argv[0])
    sourcename, destname, pattern = argv[1:4]
    pattern = pattern.decode('string_escape')
    if destname == '-': destname = sourcename
    try: charset = argv[4]
    except IndexError: charset = 'utf-8'
    source = open(sourcename, 'U', charset).read().split('\n')
    dest = open(destname, 'w', charset)
    for lines in xrange(len(source)):
        dest.write(pattern % source.pop())
    dest.close()
    print '%d lines written to %s' % (lines * pattern.count('\n') + 1, destname)

if __name__ == '__main__':
    main()

merge.py

Merge a number of text files, removing duplicates and sorting the results.

Usage: [-e charset] filenames destination
Example: merge folder/*.log list.txt merged.txt

from codecs import open
from getopt import getopt
from glob import glob
from os import linesep
from sys import argv

def main():
    if len(argv) < 3:
        exit('usage: %s [-e charset] filenames destination' % argv[0])
    options, filenames = getopt(argv[1:-1], 'e:')
    destination, filenames = argv[-1], set(filenames)
    try: charset = dict(options)['-e']
    except KeyError: charset = 'utf-8'
    for name in argv[1:-1]:
        if not name.count('*'): continue
        filenames.remove(name)
        filenames.update(glob(name))
    result = []
    for name in filenames:
        lines = open(name, 'U', charset).read().split('\n')
        result.extend(lines)
    result = sorted(set(result))
    open(destination, 'w', charset).writelines(linesep.join(result))
    print '%s = %s (%d lines)' % (' + '.join(filenames), destination, len(result))

if __name__ == '__main__':
    main()

danbooru robot

Download the newest version here: http://untu.ms/danbooru/

'''works with http://danbooro.donmai.us/
   automated content downloading by tags
   released as public domain
   author reinis ivanovs
   http//untu.ms/'''

from xml.dom import minidom
from sys import stdout
from optparse import OptionParser
import os, urllib

class SpiderOpener(urllib.FancyURLopener):
    version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1'
urllib._urlopener = SpiderOpener()

parser = OptionParser(add_help_option=False)
parser.add_option('-l', dest='limit', metavar='NUM', default=1000, type='int')
parser.add_option('-o', dest='offset', metavar='NUM', default=0, type='int')
parser.add_option('-s', dest='server', metavar='NUM', default=0, type='int')
options, args = parser.parse_args()

queue = []
tags = '+'.join([urllib.quote(item).replace('%2B', '+') for item in args])

servers = [('danboofoo.macrochan.org', 'data'), ('dan.paramnesiac.net', 'data'), ('danbooru.imouto.org', 'data'), ('danbooru.darkmirage.com', 'data'), ('danbooru.desudesu.org', 'data'), ('danbooru.fumbari.com', 'data'), ('danbooru.sr3r.net', 'data'), ('danbooru.ichijou.org', 'data'), ('danbooru.neoragod.net', 'data'), ('saguratus.com', 'danbooru/data'), ('danbooru.chiisai.net', 'data'), ('danbooru.lolitron.org', 'data'), ('danbooru.zomgwtf.net', 'data')]
server, path = servers[options.server]
api = 'http://danbooru.donmai.us/api/find_posts'

if not os.path.exists(tags):
    os.mkdir(tags)
os.chdir(tags)

def reporthook(blocks, blocksize, filesize):
    stdout.write('\r%d%%' % min(round(float(blocks)*blocksize/filesize, 2)*100, 100))

def spider(url):
    print url
    document = minidom.parse(urllib.urlopen(url))
    global queue
    for child in document.documentElement.childNodes:
        if not child.nodeType is 1: continue
        queue.append(child.attributes['file_name'].value)
    print len(queue)
    if len(queue) == 0:
        exit()
    for i in range(len(queue)):
        download()

def download():
    global server, path
    filename = queue.pop()
    url = 'http://%s/%s/%s/%s/%s' % (server, path, filename[0:2], filename[2:4], filename)
    print url
    if os.path.exists(filename):
        print '+'
        return
    urllib.urlretrieve(url, filename, reporthook)
    stdout.write('\n')

def main():
    global queue, tags, options
    offset, limit = options.offset, options.limit
    for i in range(offset, limit-100, 100):
        spider('%s?tags=%s&offset=%d&limit=%d' % (api, tags, i, i+100))
    spider('%s?tags=%s&offset=%d&limit=%d' % (api, tags, offset+limit-limit%100, limit))

if __name__ == '__main__':
    main()
« Newer Snippets
Older Snippets »
Showing 1-6 of 6 total  RSS