#!/usr/bin/env python #-*- coding:utf-8 -*- import os import urllib2 import sys from BeautifulSoup import BeautifulSoup try: url = sys.argv[1] except IndexError: print "geh - A simeple g.e-hentai downloader\nUsage: geh.py [url]" def find_next_page_link(tag): try: if tag.name == 'a' and tag.text == '>': return True return False except TypeError: return False def find_image(html): soup = BeautifulSoup(html) return soup.find('img', {'id': 'img'})['src'] def parse_index(url): html = urllib2.urlopen(url).read() soup = BeautifulSoup(html) title = soup.h1.text next_page_url = soup.find(find_next_page_link) image_list = [node['href'] for node in soup.find('div', {'id': 'gdt'}).findAll('a')] return title, next_page_url, image_list c = 1 while True: title, next_page_url, image_list = parse_index(url) dst_dir = title if not os.path.exists(dst_dir): os.mkdir(dst_dir) for page in image_list: html = urllib2.urlopen(page).read() image_url = find_image(html) fn = '{0}.{1}'.format(str(c).zfill(3), image_url.split('.')[-1]) fn = os.path.join(dst_dir, fn) print '{0}: {1} ... '.format(dst_dir, str(c).zfill(3)), with file(fn, 'wb') as f: image_data = urllib2.urlopen(image_url).read() f.write(image_data) print 'done' c += 1 if not next_page_url: break url = next_page_url['href']
A simple g.e-hentai downloader
訂閱:
張貼留言 (Atom)
沒有留言:
張貼留言