def __update(model, url): model[r'video_title'], model[r'video_url'] = model[r'video_title'] model[r'video_jacket'] = t_webtool.http_urljoin( url, model[r'video_jacket']) model[r'video_imgerror'] = t_webtool.http_urljoin( url, r'/img/noimagepl.gif') return model
def capman(url, path, rm, fmt, fmtf, rev): result = None try: lis = list() for link in t_webtool.bs4get(url).find(r'ul', r'view-win-list detail-list-select').findAll(r'a'): title = link.get_text() if re.match(rm, title): lis.append(t_webtool.http_urljoin(url, link[r'href'])) if rev == 'True': lis.reverse() params = list() num = 0 for link in lis: num += 1 params.append((link, os.path.join(path, fmt % str(num).zfill(int(fmtf))))) t_webtool.reducer(params, docap, 8) result = True finally: if not result: print(r'failed.') return result
def loader_main(): for k, v in productlist(): print(r'[PRODUCT] : %s' % k) dst = r'../../../[套图]美图日/%s' % k get(t.http_urljoin(website(), v), dst) fail = t.rmempty(dst) if fail: print(r'[Bad] : %s' % fail)
def query_pg_images(url): result = None try: data = list() for x in t.exp(url).xpath(r'//*[@class = "adw"]//img/@src'): data.append(t.http_urljoin(url, x)) result = data finally: return result
def query_pgs(url): result = None try: data = [url] for x in t.exp(url).xpath(r'//*[@class = "pg"]//a[not(@class)]/@href'): data.append(t.http_urljoin(url, x)) result = data finally: return result
def loader_main(): for k, v in productlist(): print(r'[PRODUCT] : %s' % k) params = dict() params[r'dst'] = r'../../../[套图]爱丝内/%s' % k params[r'url'] = t.http_urljoin(website(), v) mapper_product(params) fail = t.rmempty(params[r'dst']) if fail: print(r'[Bad] : %s' % fail)
def work(): result = None try: searchurl = t_webtool.http_urljoin( param[r'url'], r'vl_searchbyid.php?keyword=%s' % param['type']) result = { url: param[r'type'] for url in JavLibSearch.__pageselector(searchurl) } finally: if result is None: print(r'[ERROR] : %s.' % param) return result
def loader_main(): dbinfo = t_webtool.mkd(db=r'javlib', user=r'root', passwd=r'admin') cfg = t_webtool.IniDict() proot = os.path.dirname(__file__) cfg.read(os.path.join(proot, r'configer_jmakers.ini')) proot = os.path.join(proot, cfg[r'CONFIG'][r'%s_path' % platform.system()]) website = cfg[r'CONFIG'][r'site'] for maker, ma in cfg.resection(r'^JMAKER_(\w+)$').items(): #print(r'$$$$ start CFS $$$$') #update_cookie(website) #print(r'$$$$ end CFS $$$$') t_javlib.start_collect( proot, dbinfo, ma.group(1), t_webtool.http_urljoin(website, cfg[maker][r'url']))
def work(): result = None try: ret = dict() for div in t_webtool.bs4get(param[r'url']).findAll( r'div', r'video'): vid = video_id(div.find(r'div', r'id').get_text()) if re.match(r'^%s\W+\d+$' % param[r'type'], vid): ret[vid] = t_webtool.http_urljoin( param[r'url'], div.a[r'href']) result = ret finally: if result is None: print(r'[ERROR] : %s.' % param) return result
def query_threadpages(url): result = None try: urls = [url] maxp = 1 for x in t.exp(url).xpath(r'//div[@id = "pages"]/a/text()'): if not x.isnumeric(): continue if int(x) > maxp: maxp = int(x) for i in range(2, maxp + 1): urls.append(t.http_urljoin(url, r'%s.html' % i)) result = urls finally: return result
def query_navpagetbl(product, dst): pcount = int( int(query_product_total(product)) / len(query_navpage_threadobjtbl(product))) navpagetbl = list() navpagetbl.append({ r'dst': dst, r'url': product, }) for i in range(1, pcount + 1): navpagetbl.append({ r'dst': dst, r'url': t.http_urljoin(product, r'index_%s.html' % i), }) return navpagetbl
def query_threadinfotbl(threadobjtbl, url, dst): result = None try: threadinfotbl = dict() for threadobj in threadobjtbl: link = t.http_urljoin(url, t.expa(threadobj, r'href', r'./a/')) subject = t.expt(threadobj, r'./p[@class = "biaoti"]/a/') tbl = dict() tbl[r'cover'] = t.expa(threadobj, r'src', r'./a/img/') tbl[r'url'] = link tbl[r'subject'] = r'[%s]%s' % (re.sub(r'\D', r'', link), __fixsubject(subject)) tbl[r'dst'] = dst for v in tbl.values(): if not v: return threadinfotbl[link] = tbl result = threadinfotbl finally: return result
def query_product_page(url, dst): result = None try: threadinfos = dict() for group in t.exp(url, r'utf-8').xpath(r'//*[@class = "group"]'): link = group.xpath(r'.//*[@class = "bution"]//a')[0] threadurl = t.http_urljoin(url, t.expa(link, r'href')) thread = dict() thread[r'cover'] = t.exps( group.xpath(r'.//*[@class = "photo"]//img/@src')) thread[r'url'] = threadurl thread[r'subject'] = r'%s[%s]' % (fixsubject( t.expt(link)), re.sub(r'\D', r'', threadurl)) thread[r'dst'] = dst for v in thread.values(): if not v: return threadinfos[threadurl] = thread result = threadinfos finally: return result
def work(): result = None url = t.http_urljoin(website(), params[r'url']) try: ig = False def onerr(resp): if resp.status_code == 404: nonlocal ig ig = True if not t.http_download(url, params[r'image'], None, onerr): if ig: result = True print(r'[404] : %s.' % url) return result = True finally: if result is None: print(r'[ERROR] : %s.' % url) time.sleep(1) return result