def response(resp): results = [] dom = html.fromstring(resp.text) for res in dom.xpath('//div[@class="List-item MainListing"]'): # processed start and end of link link = res.xpath('//a')[0] url = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) thumbnail_src = urljoin(base_url, res.xpath('.//img')[0].attrib['src']) # TODO: get image with higher resolution img_src = thumbnail_src # append result results.append({ 'url': url, 'title': title, 'img_src': img_src, 'content': '', 'thumbnail_src': thumbnail_src, 'template': 'images.html' }) # return results return results
def response(resp): results = [] # get links from result-text regex = re.compile('(</a>|<a)') results_parts = re.split(regex, resp.text) cur_element = '' # iterate over link parts for result_part in results_parts: # processed start and end of link if result_part == '<a': cur_element = result_part continue elif result_part != '</a>': cur_element += result_part continue cur_element += result_part # fix xml-error cur_element = cur_element.replace('"></a>', '"/></a>') dom = html.fromstring(cur_element) link = dom.xpath('//a')[0] url = urljoin(base_url, link.attrib.get('href')) title = link.attrib.get('title', '') thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src']) # TODO: get image with higher resolution img_src = thumbnail_src # check if url is showing to a photo if '/photo/' not in url: continue # append result results.append({'url': url, 'title': title, 'img_src': img_src, 'content': '', 'thumbnail_src': thumbnail_src, 'template': 'images.html'}) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) try: logdiv = dom.xpath('//*[@id="log"]//*[contains(concat(" ",normalize-space(@class)," ")," log-messages ")]')[0] logger.debug(len(logdiv.xpath(".//h4"))+1) #this stuff is probaby buggy as hell for i in range(1,len(logdiv.xpath(".//h4"))+1): date = logdiv.xpath(".//h4[%s]" % i)[0].text #content = logdiv.xpath(".//h4[%s]/following-sibling::div" % i)[0] content = [] for item in logdiv.xpath(".//h4[%s]" % i)[0].itersiblings(): if item.tag != 'h4': content += [item.text_content().strip()] else: break results.append({'url': urljoin(base_url, logdiv.xpath(".//h4[%s]/following-sibling::div/a" % i)[0].get('href')), 'title': date.strip(), 'content': "|".join(content).strip()}) except: logger.debug("not failing silently") pass logger.debug(results) return results
def response(resp): results = [] dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): link = result.xpath(link_xpath)[0] href = urljoin(base_url, link.attrib.get('href')) # there's also a span (class="rdf-meta element-hidden" property="dc:title")'s content property for this... title = escape(extract_text(link)) thumbnail_tags = result.xpath(thumbnail_xpath) thumbnail = None if len(thumbnail_tags) > 0: thumbnail = extract_text(thumbnail_tags[0]) if thumbnail[0] == '/': thumbnail = base_url + thumbnail content = escape(extract_text(result.xpath(content_xpath))) # append result results.append({ 'url': href, 'title': title, 'img_src': thumbnail, 'content': content }) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) torrent_links = dom.xpath(torrent_xpath) if len(torrent_links) > 0: seeds = dom.xpath(seeds_xpath) peers = dom.xpath(peers_xpath) titles = dom.xpath(title_xpath) sizes = dom.xpath(size_xpath) ages = dom.xpath(age_xpath) else: # under ~5 results uses a different xpath torrent_links = dom.xpath(alternative_torrent_xpath) seeds = dom.xpath(alternative_seeds_xpath) peers = dom.xpath(alternative_peers_xpath) titles = dom.xpath(alternative_title_xpath) sizes = dom.xpath(alternative_size_xpath) ages = dom.xpath(alternative_age_xpath) # return empty array if nothing is found if not torrent_links: return [] # parse results for index, result in enumerate(torrent_links): link = result.attrib.get('href') href = urljoin(url, link) results.append({'url': href, 'title': titles[index].text_content(), 'content': '{}, {}'.format(sizes[index], ages[index]), 'seed': seeds[index], 'leech': peers[index], 'template': 'torrent.html'}) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath( '//table[contains(@class, "table-list")]/tbody//tr'): href = urljoin( url, result.xpath('./td[contains(@class, "name")]/a[2]/@href')[0]) title = extract_text( result.xpath('./td[contains(@class, "name")]/a[2]')) seed = extract_text(result.xpath('.//td[contains(@class, "seeds")]')) leech = extract_text( result.xpath('.//td[contains(@class, "leeches")]')) filesize_info = extract_text( result.xpath('.//td[contains(@class, "size")]/text()')) filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) results.append({ 'url': href, 'title': title, 'seed': seed, 'leech': leech, 'filesize': filesize, 'template': 'torrent.html' }) return results
def response(resp): '''post-response callback resp: requests response object ''' results = [] dom = html.fromstring(resp.text) try: number_of_results_string =\ re.sub('[^0-9]', '', eval_xpath(dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()')[0]) results.append({'number_of_results': int(number_of_results_string)}) except: logger.debug("Couldn't read number of results.") pass for result in eval_xpath(dom, '//section[not(contains(@class, "essay"))]'): try: url = eval_xpath(result, './/h2/a')[0].get('href') url = urljoin(base_url, url) title = eval_xpath(result, 'string(.//h2/a)').strip() content = extract_text(eval_xpath(result, './/p')) # append result results.append({'url': url, 'title': title, 'content': content}) except: logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True)) continue return results
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//div[@id="search_res"]/table/tr') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res: link = result.xpath('.//td[@class="torrent_name"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = extract_text(link) content = extract_text(result.xpath('.//pre[@class="snippet"]')[0]) content = "<br />".join(content.split("\n")) filesize = result.xpath( './/span[@class="attr_val"]/text()')[0].split()[0] filesize_multiplier = result.xpath( './/span[@class="attr_val"]/text()')[0].split()[1] files = result.xpath('.//span[@class="attr_val"]/text()')[1] seed = result.xpath('.//span[@class="attr_val"]/text()')[2] # convert seed to int if possible if seed.isdigit(): seed = int(seed) else: seed = 0 leech = 0 # convert filesize to byte if possible filesize = get_torrent_size(filesize, filesize_multiplier) # convert files to int if possible if files.isdigit(): files = int(files) else: files = None magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href'] # append result results.append({ 'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'filesize': filesize, 'files': files, 'magnetlink': magnetlink, 'template': 'torrent.html' }) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def response(resp): dom = html.fromstring(resp.text) search_res = dom.xpath('.//td[@class="x-item"]') if not search_res: return list() results = list() for result in search_res: url = urljoin(URL, result.xpath('.//a[@title]/@href')[0]) title = extract_text(result.xpath('.//a[@title]')) content = extract_text(result.xpath('.//div[@class="files"]')) files_data = extract_text( result.xpath('.//div[@class="tail"]')).split() filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) magnetlink = result.xpath( './/div[@class="tail"]//a[@class="title"]/@href')[0] results.append({ 'url': url, 'title': title, 'content': content, 'filesize': filesize, 'magnetlink': magnetlink, 'seed': 'N/A', 'leech': 'N/A', 'template': 'torrent.html' }) return results
def response(resp): dom = html.fromstring(resp.text) search_res = dom.xpath('.//td[@class="x-item"]') if not search_res: return list() results = list() for result in search_res: url = urljoin(URL, result.xpath('.//a[@title]/@href')[0]) title = extract_text(result.xpath('.//a[@title]')) content = extract_text(result.xpath('.//div[@class="files"]')) files_data = extract_text(result.xpath('.//div[@class="tail"]')).split() filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] results.append({'url': url, 'title': title, 'content': content, 'filesize': filesize, 'magnetlink': magnetlink, 'seed': 'N/A', 'leech': 'N/A', 'template': 'torrent.html'}) return results
def response(resp): results = [] dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): link = result.xpath(link_xpath)[0] href = urljoin(base_url, link.attrib.get('href')) # there's also a span (class="rdf-meta element-hidden" property="dc:title")'s content property for this... title = escape(extract_text(link)) thumbnail_tags = result.xpath(thumbnail_xpath) thumbnail = None if len(thumbnail_tags) > 0: thumbnail = extract_text(thumbnail_tags[0]) if thumbnail[0] == '/': thumbnail = base_url + thumbnail content = escape(extract_text(result.xpath(content_xpath))) # append result results.append({'url': href, 'title': title, 'img_src': thumbnail, 'content': content}) # return results return results
def preferences(): """Render preferences page && save user preferences""" # save preferences if request.method == 'POST': resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) try: request.preferences.parse_form(request.form) except ValidationException: request.errors.append(gettext('Invalid settings, please edit your preferences')) return resp return request.preferences.save(resp) # render preferences image_proxy = request.preferences.get_value('image_proxy') lang = request.preferences.get_value('language') disabled_engines = request.preferences.engines.get_disabled() allowed_plugins = request.preferences.plugins.get_enabled() # stats for preferences page stats = {} for c in categories: for e in categories[c]: stats[e.name] = {'time': None, 'warn_timeout': False, 'warn_time': False} if e.timeout > settings['outgoing']['request_timeout']: stats[e.name]['warn_timeout'] = True stats[e.name]['supports_selected_language'] = _is_selected_language_supported(e, request.preferences) # get first element [0], the engine time, # and then the second element [1] : the time (the first one is the label) for engine_stat in get_engines_stats()[0][1]: stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3) if engine_stat.get('avg') > settings['outgoing']['request_timeout']: stats[engine_stat.get('name')]['warn_time'] = True # end of stats return render('preferences.html', locales=settings['locales'], current_locale=get_locale(), image_proxy=image_proxy, engines_by_category=categories, stats=stats, answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers], disabled_engines=disabled_engines, autocomplete_backends=autocomplete_backends, shortcuts={y: x for x, y in engine_shortcuts.items()}, themes=themes, plugins=plugins, doi_resolvers=settings['doi_resolvers'], current_doi_resolver=get_doi_resolver(request.args, request.preferences.get_value('doi_resolver')), allowed_plugins=allowed_plugins, theme=get_current_theme_name(), preferences_url_params=request.preferences.get_as_url_params(), base_url=get_base_url(), preferences=True)
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//div[@class="one_result"]') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res: link = result.xpath('.//div[@class="torrent_name"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = extract_text(link) excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0] content = html.tostring(excerpt, encoding='unicode', method='text', with_tail=False) # it is better to emit <br/> instead of |, but html tags are verboten content = content.strip().replace('\n', ' | ') content = ' '.join(content.split()) filesize = result.xpath( './/span[@class="torrent_size"]/text()')[0].split()[0] filesize_multiplier = result.xpath( './/span[@class="torrent_size"]/text()')[0].split()[1] files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0] # convert filesize to byte if possible filesize = get_torrent_size(filesize, filesize_multiplier) # convert files to int if possible try: files = int(files) except: files = None magnetlink = result.xpath( './/div[@class="torrent_magnet"]//a')[0].attrib['href'] # append result results.append({ 'url': href, 'title': title, 'content': content, 'filesize': filesize, 'files': files, 'magnetlink': magnetlink, 'template': 'torrent.html' }) # return results sorted by seeder return results
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//div[@id="search_res"]/table/tr') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res: link = result.xpath('.//td[@class="torrent_name"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = extract_text(link) content = extract_text(result.xpath('.//pre[@class="snippet"]')[0]) content = "<br />".join(content.split("\n")) filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0] filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1] files = result.xpath('.//span[@class="attr_val"]/text()')[1] seed = result.xpath('.//span[@class="attr_val"]/text()')[2] # convert seed to int if possible if seed.isdigit(): seed = int(seed) else: seed = 0 leech = 0 # convert filesize to byte if possible filesize = get_torrent_size(filesize, filesize_multiplier) # convert files to int if possible if files.isdigit(): files = int(files) else: files = None magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href'] # append result results.append({'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'filesize': filesize, 'files': files, 'magnetlink': magnetlink, 'template': 'torrent.html'}) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//table[@class="data"]//tr') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res[1:]: link = result.xpath('.//a[@class="cellMainLink"]')[0] href = urljoin(url, link.attrib['href']) title = extract_text(link) content = extract_text(result.xpath(content_xpath)) seed = extract_text(result.xpath('.//td[contains(@class, "green")]')) leech = extract_text(result.xpath('.//td[contains(@class, "red")]')) filesize_info = extract_text( result.xpath('.//td[contains(@class, "nobr")]')) files = extract_text( result.xpath('.//td[contains(@class, "center")][2]')) seed = convert_str_to_int(seed) leech = convert_str_to_int(leech) filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) if files.isdigit(): files = int(files) else: files = None magnetlink = result.xpath(magnet_xpath)[0].attrib['href'] torrentfile = result.xpath(torrent_xpath)[0].attrib['href'] torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*") # append result results.append({ 'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'filesize': filesize, 'files': files, 'magnetlink': magnetlink, 'torrentfile': torrentfileurl, 'template': 'torrent.html' }) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def extract_url(xpath_results, search_url): if xpath_results == []: raise Exception('Empty url resultset') url = extract_text(xpath_results) if url.startswith('//'): # add http or https to this kind of url //example.com/ parsed_search_url = urlparse(search_url) url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url) elif url.startswith('/'): # fix relative url to the search engine url = urljoin(search_url, url) # fix relative urls that fall through the crack if '://' not in url: url = urljoin(search_url, url) # normalize url url = normalize_url(url) return url
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//table[@id="searchResult"]//tr') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res[1:]: link = result.xpath('.//div[@class="detName"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = extract_text(link) content = extract_text(result.xpath(content_xpath)) seed, leech = result.xpath('.//td[@align="right"]/text()')[:2] # convert seed to int if possible if seed.isdigit(): seed = int(seed) else: seed = 0 # convert leech to int if possible if leech.isdigit(): leech = int(leech) else: leech = 0 magnetlink = result.xpath(magnet_xpath)[0] torrentfile_links = result.xpath(torrent_xpath) if torrentfile_links: torrentfile_link = torrentfile_links[0].attrib.get('href') else: torrentfile_link = None # append result results.append({ 'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'magnetlink': magnetlink.attrib.get('href'), 'torrentfile': torrentfile_link, 'template': 'torrent.html' }) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//table[@class="data"]//tr') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res[1:]: link = result.xpath('.//a[@class="cellMainLink"]')[0] href = urljoin(url, link.attrib['href']) title = extract_text(link) content = extract_text(result.xpath(content_xpath)) seed = extract_text(result.xpath('.//td[contains(@class, "green")]')) leech = extract_text(result.xpath('.//td[contains(@class, "red")]')) filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]')) files = extract_text(result.xpath('.//td[contains(@class, "center")][2]')) seed = convert_str_to_int(seed) leech = convert_str_to_int(leech) filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) if files.isdigit(): files = int(files) else: files = None magnetlink = result.xpath(magnet_xpath)[0].attrib['href'] torrentfile = result.xpath(torrent_xpath)[0].attrib['href'] torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*") # append result results.append({'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'filesize': filesize, 'files': files, 'magnetlink': magnetlink, 'torrentfile': torrentfileurl, 'template': 'torrent.html'}) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def response(resp): results = [] dom = html.fromstring(resp.text) search_res = dom.xpath('//table[@id="searchResult"]//tr') # return empty array if nothing is found if not search_res: return [] # parse results for result in search_res[1:]: link = result.xpath('.//div[@class="detName"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = extract_text(link) content = extract_text(result.xpath(content_xpath)) seed, leech = result.xpath('.//td[@align="right"]/text()')[:2] # convert seed to int if possible if seed.isdigit(): seed = int(seed) else: seed = 0 # convert leech to int if possible if leech.isdigit(): leech = int(leech) else: leech = 0 magnetlink = result.xpath(magnet_xpath)[0] torrentfile_links = result.xpath(torrent_xpath) if torrentfile_links: torrentfile_link = torrentfile_links[0].attrib.get('href') else: torrentfile_link = None # append result results.append({'url': href, 'title': title, 'content': content, 'seed': seed, 'leech': leech, 'magnetlink': magnetlink.attrib.get('href'), 'torrentfile': torrentfile_link, 'template': 'torrent.html'}) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def extract_url(xpath_results, search_url): if xpath_results == []: raise Exception('Empty url resultset') url = extract_text(xpath_results) if url.startswith('//'): # add http or https to this kind of url //example.com/ parsed_search_url = urlparse(search_url) url = parsed_search_url.scheme + url elif url.startswith('/'): # fix relative url to the search engine url = urljoin(search_url, url) # normalize url url = normalize_url(url) return url
def response(resp): results = [] dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): link = result.xpath(link_xpath)[0] href = urljoin(url, link.attrib.get('href')) title = extract_text(link) content = extract_text(result.xpath(content_xpath)) # append result results.append({'url': href, 'title': title, 'content': content}) # return results return results
def extract_url(xpath_results, search_url): if xpath_results == []: raise Exception('Empty url resultset') url = extract_text(xpath_results) if url.startswith('//'): # add http or https to this kind of url //example.com/ parsed_search_url = urlparse(search_url) url = u'{0}:{1}'.format(parsed_search_url.scheme, url) elif url.startswith('/'): # fix relative url to the search engine url = urljoin(search_url, url) # normalize url url = normalize_url(url) return url
def response(resp): # get the base URL for the language in which request was made language = locale_to_lang_code(resp.search_params['language']) base_url = get_lang_urls(language)['base'] results = [] dom = html.fromstring(resp.text) # parse results for result in dom.xpath(xpath_results): link = result.xpath(xpath_link)[0] href = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) results.append({'url': href, 'title': title}) return results
def fetch_firefox_versions(): resp = requests.get(URL, timeout=2.0) if resp.status_code != 200: raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) else: dom = html.fromstring(resp.text) versions = [] for link in dom.xpath('//a/@href'): url = urlparse(urljoin(URL, link)) path = url.path if path.startswith(RELEASE_PATH): version = path[len(RELEASE_PATH):-1] if NORMAL_REGEX.match(version): versions.append(LooseVersion(version)) list.sort(versions, reverse=True) return versions
def response(resp): img_results = [] text_results = [] search_results = json.loads(resp.text) # return empty array if there are no results if 'data' not in search_results: return [] posts = search_results.get('data', {}).get('children', []) # process results for post in posts: data = post['data'] # extract post information params = { 'url': urljoin(base_url, data['permalink']), 'title': data['title'] } # if thumbnail field contains a valid URL, we need to change template thumbnail = data['thumbnail'] url_info = urlparse(thumbnail) # netloc & path if url_info[1] != '' and url_info[2] != '': params['img_src'] = data['url'] params['thumbnail_src'] = thumbnail params['template'] = 'images.html' img_results.append(params) else: created = datetime.fromtimestamp(data['created_utc']) content = data['selftext'] if len(content) > 500: content = content[:500] + '...' params['content'] = content params['publishedDate'] = created text_results.append(params) # show images first and text results second return img_results + text_results
def response(resp): results = [] dom = html.fromstring(resp.text) result_rows = dom.xpath(torrent_xpath) try: script_element = dom.xpath(script_xpath)[0] json_string = script_element.text[script_element.text.find('{'):] torrents_json = loads(json_string) except: return [] # parse results for torrent_row, torrent_json in zip(result_rows, torrents_json['data']['list']): title = torrent_json['name'] seed = int(torrent_json['seeds']) leech = int(torrent_json['peers']) size = int(torrent_json['size']) torrent_hash = torrent_json['hash'] torrentfile = torrent_file_url.format(torrent_hash=torrent_hash) magnetlink = 'magnet:?xt=urn:btih:{}'.format(torrent_hash) age = extract_text(torrent_row.xpath(age_xpath)) link = torrent_row.xpath(link_xpath)[0] href = urljoin(url, link) # append result results.append({'url': href, 'title': title, 'content': age, 'seed': seed, 'leech': leech, 'filesize': size, 'torrentfile': torrentfile, 'magnetlink': magnetlink, 'template': 'torrent.html'}) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True)
def response(resp): results = [] dom = html.fromstring(resp.text) # parse results for tweet in dom.xpath(results_xpath): try: link = tweet.xpath(link_xpath)[0] content = extract_text(tweet.xpath(content_xpath)[0]) img_src = tweet.xpath(avatar_xpath)[0] img_src = img_src.replace('_bigger', '_normal') except Exception: continue url = urljoin(base_url, link.attrib.get('href')) title = extract_text(tweet.xpath(title_xpath)) pubdate = tweet.xpath(timestamp_xpath) if len(pubdate) > 0: timestamp = float(pubdate[0].attrib.get('data-time')) publishedDate = datetime.fromtimestamp(timestamp, None) # append result results.append({ 'url': url, 'title': title, 'content': content, 'img_src': img_src, 'publishedDate': publishedDate }) else: # append result results.append({ 'url': url, 'title': title, 'content': content, 'img_src': img_src }) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath('//table[contains(@class, "table-list")]/tbody//tr'): href = urljoin(url, result.xpath('./td[contains(@class, "name")]/a[2]/@href')[0]) title = extract_text(result.xpath('./td[contains(@class, "name")]/a[2]')) seed = extract_text(result.xpath('.//td[contains(@class, "seeds")]')) leech = extract_text(result.xpath('.//td[contains(@class, "leeches")]')) filesize_info = extract_text(result.xpath('.//td[contains(@class, "size")]/text()')) filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) results.append({'url': href, 'title': title, 'seed': seed, 'leech': leech, 'filesize': filesize, 'template': 'torrent.html'}) return results
def response(resp): results = [] dom = html.fromstring(resp.text) # parse results for tweet in dom.xpath(results_xpath): try: link = tweet.xpath(link_xpath)[0] content = extract_text(tweet.xpath(content_xpath)[0]) img_src = tweet.xpath(avatar_xpath)[0] img_src = img_src.replace('_bigger', '_normal') except Exception: continue url = urljoin(base_url, link.attrib.get('href')) title = extract_text(tweet.xpath(title_xpath)) pubdate = tweet.xpath(timestamp_xpath) if len(pubdate) > 0: timestamp = float(pubdate[0].attrib.get('data-time')) publishedDate = datetime.fromtimestamp(timestamp, None) # append result results.append({'url': url, 'title': title, 'content': content, 'img_src': img_src, 'publishedDate': publishedDate}) else: # append result results.append({'url': url, 'title': title, 'content': content, 'img_src': img_src}) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) for k, result in enumerate(dom.xpath(results_xpath)[1:]): try: from_result, to_results_raw = result.xpath('./td') except: continue to_results = [] for to_result in to_results_raw.xpath('./p/a'): t = to_result.text_content() if t.strip(): to_results.append(to_result.text_content()) results.append({ 'url': urljoin(resp.url, '?%d' % k), 'title': from_result.text_content(), 'content': '; '.join(to_results) }) return results
def response(resp): results = [] response_json = loads(resp.text) # parse results for result in response_json['photos']: url = urljoin(base_url, result['url']) title = result['name'] # last index is the biggest resolution img_src = result['image_url'][-1] thumbnail_src = result['image_url'][0] content = result['description'] or '' # append result results.append({'url': url, 'title': title, 'img_src': img_src, 'content': content, 'thumbnail_src': thumbnail_src, 'template': 'images.html'}) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]): try: from_result, to_results_raw = eval_xpath(result, './td') except: continue to_results = [] for to_result in eval_xpath(to_results_raw, './p/a'): t = to_result.text_content() if t.strip(): to_results.append(to_result.text_content()) results.append({ 'url': urljoin(resp.url, '?%d' % k), 'title': from_result.text_content(), 'content': '; '.join(to_results) }) return results
def clear_cookies(): resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) for cookie_name in request.cookies: resp.delete_cookie(cookie_name) return resp