示例#1
0
def fetchAlbum(url):
    pq = helper.get(url)
    dirName = os.path.join('.', 'aiss', pq('title').text())
    helper.mkDir(dirName)
    index = 1
    for img in pq('.message > img'):
        helper.downloadImg(img.get('src'),
                           os.path.join(dirName, '%03d.jpg' % index))
        index += 1
示例#2
0
def fetchAlbum(url, dirName):
    if 'rosi' in url:
        pq = helper.get(url)
        dirName = os.path.join(dirName,
                               pq('#post-title').text().split('No.')[1])
        helper.mkDir(dirName)
        for a in pq('.gallery-icon > a'):
            imgUrl = a.get('href')
            helper.downloadImg(imgUrl,
                               os.path.join(dirName,
                                            imgUrl.split('/')[-1]))
示例#3
0
def fetchPage(page):
    url = '%s/page/%d/' % (BASE_URL, page)
    pq = helper.get(url)
    for a in pq('a.disp_a'):
        title = a.get('title').replace('Permalink to ', '')
        url = a.get('href')
        dirName = os.path.join('cartoon', title)
        if not os.path.exists(os.path.join(dirName, 'url.txt')):
            helper.mkDir(dirName)
            if not fetchGallery(url, title, page):
                return False
    return True
示例#4
0
def fetchComic(webPage, comicIndex, url, page = 1, comicDir = None):
	pq = helper.get('%s/%d' % (url, page))
	if page == 1:
		title = pq('title').text().replace('/', '&').split(' - ')[0]
		comicDir = os.path.join('animezilla', title)
		if os.path.exists(os.path.join('animezilla', '0uploaded', title, 'done.txt')):
			return True
		if os.path.exists(os.path.join(comicDir, 'done.txt')):
			return True
		helper.mkDir(comicDir)
	if webPage == 1 and comicIndex == 16:
		if page < 90:
			return fetchComic(webPage, comicIndex, url, 90, comicDir)

	img = pq('img#comic')
	print('[%s] downloading webPage page => %d, comic index => %d, comic page => %d' % (helper.now(), webPage, comicIndex, page))
	downloadImg(img.attr('src'), os.path.join(comicDir, '%03d.jpg' % page), url)
	time.sleep(3)
	if(len(img.parents('a')) == 0):
		helper.writeFile('done', os.path.join(comicDir, 'done.txt'))
		return True
	return fetchComic(webPage, comicIndex, url, page + 1, comicDir)
示例#5
0
def fetch_model(url, name, head_img):
    '''fetch model'''
    model_dir = os.path.join('vivthomas', 'model')
    helper.mkDir(model_dir)
    helper.mkDir(os.path.join('vivthomas', 'photo'))
    # 下载头像先
    helper.downloadImg(head_img, os.path.join(
        model_dir, '%s.jpg' % name))
    if os.path.exists(os.path.join('vivthomas', 'model', '%s.json' % (name))):
        return
    # 然后去抓取详细数据
    model_info = {
        'name': name,
        'photos': []
    }
    pyquery = helper.get(url)
    country_span = pyquery('.custom-country')
    model_info['country'] = country_span.text() if country_span else 'unknow'

    # 获取照片数据
    custom_content_list = pyquery('.custom-content-list')
    custom_content = None
    for item in custom_content_list:
        if item.getchildren()[0].getchildren()[0].text.startswith('Photos with'):
            custom_content = item
            break
        # if item.getchildren()[0].getchildren()[0].text:
        #     pass
    if custom_content is None:
        helper.writeFile(json.dumps(model_info), os.path.join(
            'vivthomas', 'model', '%s.json' % (name)))
        return
    # if len(custom_content_list) == 3:
    #     custom_content = custom_content_list[1]
    # else:
    #     custom_content = custom_content_list[0]
    list_group_item_list = custom_content.getchildren()[2].findall('li')
    for list_group_item in list_group_item_list:
        custom_list_item_detailed = list_group_item.getchildren()[1]
        img = custom_list_item_detailed.getchildren()[0].getchildren()[
            0].getchildren()[0]
        # custom_list_item_detailed.getchildren()[1].getchildren()[0].getchildren()[0].text
        photo_name = img.get('alt')
        # Released: Feb 26, 2016
        date_str = custom_list_item_detailed.getchildren()[1].getchildren()[
            1].text_content().split(': ')[1]
        date_str = '%s-%d-%s' % (date_str.split(', ')[1], helper.getMonth(
            date_str.split(' ')[0]), date_str.split(' ')[1].replace(',', ''))
        # 模特名
        arr = custom_list_item_detailed.getchildren()[1].getchildren()[
            2].getchildren()
        model_name_arr = []
        for i in xrange(1, len(arr)):
            model_name_arr.append(arr[i].text)
        # model_name = custom_list_item_detailed.getchildren()[1].getchildren()[2].getchildren()[1].text
        # print(model_name_arr)
        # date = datetime.datetime(int(date_str.split(', ')[1]), helper.getMonth(date_str.split(' ')[0]), int(date_str.split(' ')[1].replace(',', '')))
        # print date
        # 下载照片的封面
        photo_path = os.path.join('vivthomas', 'photo', '%s_%s' % (date_str, photo_name.replace(
            '/', ' ')), '%s_%s.jpg' % (date_str, photo_name.replace('/', ' ')))
        helper.downloadImg(img.get('src'), photo_path)
        # 存到数据库
        # mongo.newAlbun(photo_name, date)
        photo_json = {
            'date': date_str,
            'name': photo_name,
            'model': model_name_arr
        }
        photo_json_str = json.dumps(photo_json)
        model_info.get('photos').append(photo_json)
        helper.writeFile(photo_json_str, os.path.join(
            'vivthomas', 'photo', '%s_%s' % (date_str, photo_name), '%s_%s.json' % (date_str, photo_name)))
    helper.writeFile(json.dumps(model_info), os.path.join(
        'vivthomas', 'model', '%s.json' % (name)))
示例#6
0
        except Exception as e:
            print(e)
            return
        with open(imgPath, 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
                    f.flush()


if __name__ == '__main__':
    baseUrl = 'http://www.hhssee.com/manhua31358.html'
    pq = helper.get(baseUrl)
    comicName = pq('h1').text()
    comicDir = os.path.join('hhssee', comicName)
    helper.mkDir(comicDir)

    bookUrlArr = []
    for a in pq('a.l_s'):
        bookUrlArr.append({
            'url': 'http://www.hhssee.com%s' % a.get('href'),
            'name': a.text
        })

    bookIndex = 0
    for bookData in bookUrlArr:
        bookIndex += 1
        # if bookIndex < 5:
        # 	continue
        bookName = bookData.get('name')
        if not '卷' in bookName:
示例#7
0
import helper
import re, os, calendar, datetime

# http://madamevoyeur.com/index.php?p=1&a=2010
BASE_URL = 'http://madamevoyeur.com'

# def fetchImagePage(year, pageIndex):
# 	url = 'http://madamevoyeur.com/index.php?p=365&a=2016'
# 	pq = helper.get(url)
# 	pq("a[name='%s']>video" %CateGory)
# 	for item in pq(paramter):

if __name__ == '__main__':
	dirName = 'madameVoyeur'
	helper.mkDir(dirName)
	t = datetime.datetime.now()
	nowYear = int(t.year)
	year = 2017
	while year <= nowYear:
		pageIndex = 24
		for pageIndex in range(1, calendar.isleap(year) and 367 or 366):
			print('%s => year: %d page: %d' % (helper.now(), year, pageIndex))
			# http://madamevoyeur.com/images/2016/16035.jpg
			helper.downloadImg('%s/images/%d/%02d%03d.jpg' % (BASE_URL, year, 0 if year == 2010 else year - 2000, pageIndex), os.path.join(dirName, '%d_%03d_1.jpg' % (year, pageIndex)))
			helper.downloadImg('%s/images/%d-2/%02d%03d.jpg' % (BASE_URL, year, 0 if year == 2010 else year - 2000, pageIndex), os.path.join(dirName, '%d_%03d_2.jpg' % (year, pageIndex)))
			helper.downloadImg('%s/images/%d-3/%02d%03d.jpg' % (BASE_URL, year, 0 if year == 2010 else year - 2000, pageIndex), os.path.join(dirName, '%d_%03d_3.jpg' % (year, pageIndex)))
		year += 1


	# year 2017 page 24
示例#8
0
def fetchGallery(url, page):
	print('now page %d' % page)
	pq = helper.get(url)
	# SexArt – Alexis Crystal & Michael Fly – Call | AdultPhotoSets.ru
	title = pq('title').text()
	title = title.split(' | ')[0]
	dirName = os.path.join('imgs', '0uploaded', title)
	dirName = filterDirName(dirName)
	if os.path.exists(dirName):
		print('exists!!! skip!')
		return True
	dirName = os.path.join('imgs', '0uploaded', '0baidu', title)
	dirName = filterDirName(dirName)
	if os.path.exists(dirName):
		print('exists!!! skip!')
		return True
	dirName = os.path.join('imgs', '0uploaded', 'MetArt', title)
	dirName = filterDirName(dirName)
	if os.path.exists(dirName):
		print('exists!!! skip!')
		return True
	dirName = os.path.join('imgs', '0nas', 'MetArt', title)
	dirName = filterDirName(dirName)
	if os.path.exists(dirName):
		print('exists!!! skip!')
		return True
	dirName = os.path.join('imgs', '0nas', 'MetArtX', title)
	dirName = filterDirName(dirName)
	if os.path.exists(dirName):
		print('exists!!! skip!')
		return True
	dirName = os.path.join('imgs', '0error', title)
	dirName = filterDirName(dirName)
	if os.path.exists(dirName):
		print('exists!!! skip!')
		return True

	# 创建本地目录
	dirName = os.path.join('imgs', title)
	dirName = filterDirName(dirName)
	# print('make dir %s' % dirName)
	# 如果存在url.txt,说明这个相册已经抓取过了,直接return吧
	if os.path.exists('%s/url.txt' % dirName):
		print('exists!!! skip!')
		return True
	helper.mkDir(dirName)
	i = 0
	tag = None
	imgUrl = []
	aArr = pq('a.externalLink')
	if not aArr or len(aArr) < 1:
		aArr = pq('div.content>p>a')
		if not aArr or len(aArr) < 1:
			aArr = pq('div.content>a')
			if not aArr or len(aArr) < 1:
				# http://imgtrex.com/8kbfdzphqsr1/daniela-dressed-for-sex-02-10000px
				arr = re.compile(r'http://imgtrex\.com/\w+/[a-z0-9-]+\.jpg').findall(pq.html())
				if len(arr) == 0:
					print('can\'t find any <a>')
					if url == 'http://adultphotosets.ru/met-art-lupita-gifera/':
						return True
					if url == 'http://adultphotosets.ru/rylskyart-oretha-mars-second-2-mars/':
						return True
					if url == 'http://adultphotosets.ru/met-art-nikolina-deirth/':
						return True
					return False
				aArr = [{'href': a} for a in arr]
				# for a in arr:
				# 	aArr.append({'href': a})
			
		if aArr and len(aArr) > 0:
			if 'imgchili.net' in aArr[0].get('href'):
				imgArr = pq('div.content>p>a>img')
				if not imgArr or len(imgArr) < 1:
					imgArr = pq('div.content>a>img')
				# http://t10.imgchili
				if imgArr and len(imgArr) > 0:
					tag = imgArr[0].get('src').replace('http://', '').split('.imgchili')[0].replace('t', '')

	for a in aArr:
		print('%s image index => %d' % (helper.now(), i))
		url = fetchLargeImageUrl(a.get('href'), tag)
		if url == None:
			if i == 0:
				print('fetchLargeImageUrl failed')
				return True
		else:
			if url != '':
				imgUrl.append(url)
		i += 1
		
	if len(imgUrl) > 0:
		helper.writeFile('\n'.join(imgUrl), '%s/url.txt' % dirName)
	return True
示例#9
0
def fetchModel(url=None, headUrl=None, name='Abril C', score=8.97):
    if url is None:
        url = 'https://www.eternaldesire.com/model/abril-c/'
    if headUrl is None:
        headUrl = 'https://static.eternaldesire.com/media/headshots/abril-c.jpg?fv=e6f189022422389d377149f795d1da13'

    modelPath = os.path.join('eternaldesire', 'models', name)
    helper.mkDir(modelPath)
    helper.downloadImg(headUrl,
                       os.path.join(modelPath, '%s_EternalDesire.jpg' % name))

    modelInfo = {
        'name': name,
        'score': score,
        'Age first shot': 0,
        'Eye color': '',
        'Hair color': '',
        'Breasts': '',
        'Shaved': '',
        'Measurements': '',
        'Height': '',
        'Weight': '',
        'Country': '',
        'Ethnicity': '',
        'photos': []
    }

    pq = helper.get(url, None, None, 1)
    infoLiArr = pq('.model_info > ul > li')
    for li in infoLiArr:
        arr = li.text_content().split(': ')
        for key in modelInfo:
            if key == arr[0]:
                modelInfo[key] = arr[1]
                break
    photoIndex = 1
    while photoIndex < 100:
        photoDivArr = pq('#latest_photo_update_%d .update_cell' % photoIndex)
        liArr = pq('#latest_photo_update_%d .hover_container_stats li' %
                   photoIndex)
        if not liArr or len(liArr) == 0:
            break
        for i in xrange(0, len(photoDivArr)):
            photoInfo = {'name': '', 'date': '0.0.1970', 'model': ''}
            img = photoDivArr[i].find('a').find('img')
            coverUrl = img.get('src')
            photoInfo['name'] = img.get('alt')
            photoInfo['date'] = liArr[i * 3].text_content().replace(
                'Date published:', '')
            photoInfo['model'] = liArr[i * 3 + 1].text_content().replace(
                'Featuring: ', '')
            jsonStr = json.dumps(photoInfo)
            photoPath = os.path.join(
                'eternaldesire', 'photos',
                photoInfo.get('name') + '-' + photoInfo['model'])
            helper.mkDir(photoPath)
            helper.writeFile(jsonStr, os.path.join(photoPath, 'info.json'))
            helper.downloadImg(
                coverUrl,
                os.path.join(
                    photoPath,
                    '%s_cover_EternalDesire.jpg' % photoInfo.get('name')))

            modelInfo['photos'].append(photoInfo)
        photoIndex += 1
    helper.writeFile(json.dumps(modelInfo),
                     os.path.join(modelPath, 'info.json'))
示例#10
0
import flightclub
import stockx
import goat
import nike
import kickz
import sys


# WEB_DOMAIN = ['footlocker', 'jimmyjazz', 'sneakersnstuff', 'footaction']
WEB_DOMAIN = ['finishline', 'champssports', 'stadiumgoods',
              'flightclub', 'eastbay', 'stockx', 'goat', 'kickz', 'footlocker',
              'nike', 'jimmyjazz']

if __name__ == '__main__':
    for dir_name in WEB_DOMAIN:
        helper.mkDir(os.path.join('.', 'imgs', dir_name))
    parser = argparse.ArgumentParser()
    parser.add_argument("target", help=','.join(WEB_DOMAIN))
    parser.add_argument("action", help='common or hot')
    options = parser.parse_args()

    target = options.target
    action = options.action
    if target not in WEB_DOMAIN:
        print('legal target: [%s] ' % ', '.join(WEB_DOMAIN))
    else:
        if target == 'eastbay':
            eastbay.start(action)
        elif target == 'footlocker':
            footlocker.start(action)
        elif target == 'jimmyjazz':
示例#11
0
文件: s.py 项目: balletootu/photosets
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# 生成缩略图,以供上传到qq群文件里

import Image, os, helper

dir_name = r'G:\adultphotosets\imgs\EternalDesire – Abril C – Turgues'.decode(
    'utf-8')


def scale(imgPath):
    print('try to open file: %s' % imgPath)
    im = Image.open(imgPath)
    # 获得图像尺寸:
    w, h = im.size
    im.thumbnail((200, int(h * 200 / w)))
    # 把缩放后的图像用jpeg格式保存:
    arr = imgPath.split('\\')
    arr.insert(-1, 's')
    im.save('\\'.join(arr), 'jpeg')
    print('saved => %s' % '\\'.join(arr))


if __name__ == '__main__':
    helper.mkDir(os.path.join(dir_name, 's'))
    for parent, dirnames, filenames in os.walk(dir_name):
        for filename in filenames:
            if not parent.endswith('\\s'):
                if filename.endswith('jpg'):
                    scale(os.path.join(parent, filename))
示例#12
0
import json
import datetime
import helper

modelArr = []
albumArr = []

# def findModel(id):
#     global modelArr
#     for model in modelArr:
#         if model.get('id') == id:
#             return model

if __name__ == '__main__':
    model_dir = '/Users/eddie104/Documents/hongjie/photosets/femjoy/model'
    helper.mkDir(model_dir)
    page = 1
    while True:
        url = 'https://www.femjoy.com/api/v2/actors?sorting=date&thumb_size=355x475&limit=48&page=%d' % page
        txt = helper.get(url, returnText=True)
        jsonData = json.loads(txt)
        total_pages = jsonData.get('pagination').get('total_pages')

        for modelData in jsonData.get('results'):
            model = {
                'id':
                modelData.get('id'),
                'slug':
                modelData.get('slug'),
                'name':
                modelData.get('name'),