Scriptkid: 4月 2014

如何用圖片的md5去反查Gelbooru的相關資訊?

Gelbooru的預設檔名是: 'md5.ext'

這樣的檔名沒有包含多少有用的資訊。如何用md5去反查像: id, tags 之類的資訊？

URL = 'http://gelbooru.com/index.php?page=dapi&s=post&q=index&tags=md5%3a{0}'

其中{0}可以是:

md5
md5*(下載回來的檔名)

詳細參照: http://gelbooru.com/index.php?page=help&topic=cheatsheet

=============================================================
另外，yande.re也支援類似的查詢。

URL = 'https://yande.re/post.json?{0}'

參數:
------------------------------------------------------------
* limit=100 # result per request, maximum is 100

* page=1 # index started at 1

* tags='' # result filter by tags

其中，tags=id:{post_id}

參照: https://yande.re/help/api

簡易的 flvxz.com 解析工具

	#!/usr/bin/env python
	# -- coding: utf-8 --

	import sys
	import urllib2
	from base64 import b64encode
	from BeautifulSoup import BeautifulStoneSoup

	Help = """A tool for fetch flvxz.com video URL parse result.

	Usage: flvxz.py [url]
	"""

	Query_URL = 'http://api.flvxz.com/url/{0}'

	try:
	url = Query_URL.format(b64encode(sys.argv[1]))
	except IndexError:
	print Help
	sys.exit(1)

	xml = urllib2.urlopen(url).read()
	soup = BeautifulStoneSoup(xml)

	for video in soup.findAll('video'):
	title = u''
	for c in video.title.text.split('\\u'):
	if not c:
	continue
	title += unichr(int(c, 16))
	quality = video.quality.text
	ftype = video.ftype.text
	furl = video.furl.text

	print '[{0}] {2}.{1}'.format(quality, ftype, title)
	print furl

view raw flvxz.py hosted with ❤ by GitHub

將 .gjots 轉換成 .epub 的小工具

不支援 gjots的folder功能

pandoc 1.9.1.1 不支援subchapter，需要subchapter則需要修改toc.ncx檔案

	#!/usr/bin/env python
	# -- coding: utf-8 --


	import os
	import sys


	Help = """ A simple converter for gjots. convert '.gjots' file to '.epub' format.

	Note:
	All notes will be located in 'root' node, no folder supported!

	Pandoc don't support subchapter in toc.ncx.
	If you want subchapter make your own toc.ncx!

	Usage: gjots2epub.py [input_file]
	The ouput filename will be '[input_file].epub'
	"""


	try:
	output = ''

	input_file = os.path.basename(sys.argv[1])
	output_file = '{0}.epub'.format(input_file)
	tmp_file = '{0}.tmp'.format(input_file)

	with file(sys.argv[1]) as f:
	for line in f.readlines():
	if '\\NewEntry' in line:
	line = '\n\n# '
	elif '\\NewFolder' in line:
	line = '\n\n# '
	elif '\\EndFolder' in line:
	line = '\n'
	output += line

	with file(tmp_file, 'w') as f:
	f.write(output)

	os.system("pandoc '{0}' -o '{1}'".format(tmp_file, output_file))
	os.remove(tmp_file)

	except IndexError:
	print Help
	sys.exit(1)
	except IOError:
	print 'No such file exists: {0}'.format(sys.argv[1])
	sys.exit(1)

view raw gjots2epub.py hosted with ❤ by GitHub

A simple g.e-hentai downloader

#!/usr/bin/env python
#-*- coding:utf-8 -*-

import os
import urllib2
import sys
from BeautifulSoup import BeautifulSoup

try:
    url = sys.argv[1]
except IndexError:
    print "geh - A simeple g.e-hentai downloader\nUsage: geh.py [url]"
        

def find_next_page_link(tag):
    try:
        if tag.name == 'a' and tag.text == '>': return True
        return False
    except TypeError:
        return False 
               

def find_image(html):
    soup = BeautifulSoup(html)
    return soup.find('img', {'id': 'img'})['src']
    

def parse_index(url):    
    html = urllib2.urlopen(url).read()
    soup = BeautifulSoup(html)

    title = soup.h1.text
    next_page_url = soup.find(find_next_page_link)
    image_list = [node['href'] for node in soup.find('div', {'id': 'gdt'}).findAll('a')]

    return title, next_page_url, image_list

c = 1
while True:    
    title, next_page_url, image_list = parse_index(url)

    dst_dir = title
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)
    
    for page in image_list:        
        html = urllib2.urlopen(page).read()
        image_url = find_image(html)

        fn = '{0}.{1}'.format(str(c).zfill(3), image_url.split('.')[-1])
        fn = os.path.join(dst_dir, fn)
        
        print '{0}: {1} ... '.format(dst_dir, str(c).zfill(3)),
        with file(fn, 'wb') as f:
            image_data = urllib2.urlopen(image_url).read()
            f.write(image_data)
        print 'done'            
        c += 1

    if not next_page_url:
        break
    
    url = next_page_url['href']