def get_pages(response, topics, link_list): i = 0 nb_link = 0 nb_requests = 0 for res in response: # get each page from the last list_pages = [] while int(link_list[i].split('-')[3]) > 0: previous = link_list[i].split('-') previous_page = int(previous[3]) previous_page -= 1 previous[3] = str(previous_page) link_list[i] = '-'.join(previous) list_pages.append(link_list[i]) print link_list[i] if nb_requests > 8: rs = (grequests.get(u) for u in list_pages) laresponse = grequests.map(rs) get_pseudos(laresponse) nb_requests = 0 del list_pages[:] nb_requests+=1 rs = (grequests.get(u, stream=True) for u in list_pages) laresponse = grequests.map(rs) get_pseudos(laresponse) res.close() i+=1 return
def scan(self, session): time_started = time.strftime('%Y-%m-%d %H:%M:%S') author = getpass.getuser() http, https = self.supported.keys()[0], self.supported.keys()[1] url_factory = self.url_factory() urls = url_factory(http, session) + url_factory(https, session) self.session = session async_requests = [ grequests.head( url=url, allow_redirects=False, headers={ 'User-Agent': settings.USER_AGENT, 'Host': host}, hooks=dict(response=self.success_hook), timeout=settings.TIMEOUT) for host, url in urls] grequests.map( requests=async_requests, size=settings.CONCURRENT_REQUESTS, exception_handler=self.failure_hook) time_ended = time.strftime('%Y-%m-%d %H:%M:%S') scan_instance = ScanInstance( start_time=time_started, end_time=time_ended, author=author) self.session.add(scan_instance) export_xlsx(session)
def ping_containers(ctx, container_paths): api_host = ctx['api_host'] headers = dl_lib.get_request_headers(ctx) host_mac = dl_lib.get_host_mac(ctx) host_name = str(socket.gethostname()) def create_request(path): id = dl_lib.get_container_id(path) details = { 'mac': host_mac, 'hostname': host_name, 'tags': '', 'os_name': 'docker', 'os_version': '', 'container_name': '', 'proc_list': dl_lib.get_processes(id), 'container_name': '001', 'ip': '', 'interfaces': dl_lib.get_network(id), 'mode': 'SOLO', 'name': id } finger = dl_lib.hash_id(path) url = "%s/api/agents/%s/ping" % (api_host, finger,) return grequests.post(url, json=details, headers=headers) reqs = map(create_request, container_paths) grequests.map(reqs)
def list_members(request, microcosm_id): offset = int(request.GET.get('offset', 0)) microcosm_url, params, headers = Microcosm.build_request(request.get_host(), id=microcosm_id, offset=offset, access_token=request.access_token) request.view_requests.append(grequests.get(microcosm_url, params=params, headers=headers)) try: responses = response_list_to_dict(grequests.map(request.view_requests)) except APIException as exc: return respond_with_error(request, exc) microcosm = Microcosm.from_api_response(responses[microcosm_url]) roles_url, params, headers = RoleList.build_request(request.META['HTTP_HOST'], id=microcosm_id, offset=offset, access_token=request.access_token) request.view_requests.append(grequests.get(roles_url, params=params, headers=headers)) try: responses = response_list_to_dict(grequests.map(request.view_requests)) except APIException as exc: return respond_with_error(request, exc) roles = RoleList.from_api_response(responses[roles_url]) view_data = { 'user': Profile(responses[request.whoami_url], summary=False) if request.whoami_url else None, 'site': Site(responses[request.site_url]), 'site_section': 'memberships', 'content': microcosm, 'memberships': roles, 'item_type': 'microcosm', 'pagination': build_pagination_links(responses[roles_url]['roles']['links'], roles.items) } return render(request, members_list_template, view_data)
def parse(url): if url in start_urls: async_list = [] print (url) for i in xrange(1, 6): rs = (grequests.get(url+'?&pg={}'.format(i), hooks = {'response' : scrape})) async_list.append(rs) grequests.map(async_list) page = requests.get(url) soup = bs(page.text, 'lxml') try: active_sel = soup.find('span', 'zg_selected').find_next() if active_sel.name == 'ul': links_lists= active_sel.find_all('li') asins = pool.map(multiparse, links_lists) for asin in asins: async_list = [] print (asin) for i in xrange(1, 6): rs = (grequests.get(asin+'?&pg={}'.format(i), hooks = {'response' : scrape})) async_list.append(rs) parse(asin) grequests.map(async_list) except: parse(url)
def parse(url): if url in start_urls: async_list = [] print(url) rs = grequests.get(url, headers=headers, hooks={"response": scrape}) async_list.append(rs) grequests.map(async_list) page = requests.get(url) soup = bs(page.text, "lxml") try: active_sel = soup.find("span", "zg_selected").find_next() if active_sel.name == "ul": links_lists = active_sel.find_all("li") asins = pool.map(multiparse, links_lists) for asin in asins: async_list = [] print(asin) for i in xrange(1, 6): rs = grequests.get(asin + "?&pg={}".format(i), hooks={"response": scrape}) async_list.append(rs) parse(asin) grequests.map(async_list) except: parse(url)
def geturls(urls,headers=None): movies = [] movie_hrefs = [] # 使用grequest进行异步获取网页 movies_requests = [grequests.get(u,headers=headers) for u in urls] movies_datas = grequests.map(movies_requests) for movies_d in movies_datas: movies_soup = BeautifulSoup(movies_d.text,'lxml') names = movies_soup.select('div.item > div.info > div.hd > a > span:nth-of-type(1)') ranks = movies_soup.select('div.item > div.pic > em') hrefs = movies_soup.select('div.item > div.info > div.hd > a') for name,rank,href in zip(names,ranks,hrefs): movie = MovieClass.Movie(name.get_text(),rank.get_text(),href.get('href')) movies.append(movie) for href in hrefs: movie_hrefs.append(href.get('href')) # 异步获取250个网页 movie_requests = [grequests.get(u,headers=headers) for u in movie_hrefs] movie_webs = grequests.map(movie_requests) # 将网页数据和电影初步数据保存下来 with open('moviehtmls.txt','wb') as fs: pickle.dump(movie_webs,fs) with open('moviesdata.txt','wb') as fs: pickle.dump(movies,fs) return movies,movie_webs
def edit(request, comment_id): """ Edit a comment. """ try: responses = response_list_to_dict(grequests.map(request.view_requests)) except APIException as exc: return respond_with_error(request, exc) view_data = { 'user': Profile(responses[request.whoami_url], summary=False), 'site': Site(responses[request.site_url]), } if request.method == 'POST': form = CommentForm(request.POST) if form.is_valid(): comment_request = Comment.from_edit_form(form.cleaned_data) try: comment = comment_request.update(request.get_host(), access_token=request.access_token) except APIException as exc: return respond_with_error(request, exc) try: process_attachments(request, comment) except ValidationError: try: responses = response_list_to_dict(grequests.map(request.view_requests)) except APIException as exc: return respond_with_error(request, exc) comment_form = CommentForm( initial = { 'itemId': comment.item_id, 'itemType': comment.item_type, 'comment_id': comment.id, 'markdown': request.POST['markdown'], }) view_data = { 'user': Profile(responses[request.whoami_url], summary=False), 'site': Site(responses[request.site_url]), 'content': comment, 'comment_form': comment_form, 'error': 'Sorry, one of your files was over 5MB. Please try again.', } return render(request, form_template, view_data) if comment.meta.links.get('commentPage'): return HttpResponseRedirect(build_comment_location(comment)) else: return HttpResponseRedirect(reverse('single-comment', args=(comment.id,))) else: view_data['form'] = form return render(request, form_template, view_data) if request.method == 'GET': try: comment = Comment.retrieve(request.get_host(), comment_id, access_token=request.access_token) except APIException as exc: return respond_with_error(request, exc) view_data['form'] = CommentForm(comment.as_dict) return render(request, form_template, view_data)
def query_async_resolve(async_queries): if isinstance(async_queries, list): out = [] for x in grequests.map(async_queries): try: out.append(x.json()) except (ValueError, AttributeError): pass return out elif isinstance(async_queries, dict): items = async_queries.items() results = [] # Don't pass `.values()`. There's no guarantee that the values will be # returned in the same order as `.items()`. It's also an extra # unnecessary call. for x in grequests.map(v for k, v in items): try: results.append(x.json()) except ValueError: pass return {k: results[i] for i, (k, v) in enumerate(items)} raise Exception("Unknown type passed to query_async_resolve")
def parse(url): page = requests.get(url) soup = bs(page.text, 'lxml') if url == start_url: async_list = [] for i in xrange(1, 6): print (url+'?&pg={}'.format(i)) rs = (grequests.get(url+'?&pg={}'.format(i), hooks = {'response' : scrape})) async_list.append(rs) grequests.map(async_list) active_sel = soup.find('span', 'zg_selected').find_next() if active_sel.name == 'ul': links_list = active_sel.find_all('li') for link_list in links_list: link = link_list.find('a')['href'].encode('utf-8') async_list = [] for i in xrange(1, 6): print (link+'?&pg={}'.format(i)) rs = (grequests.get(link+'?&pg={}'.format(i), hooks = {'response' : scrape})) async_list.append(rs) grequests.map(async_list) parse(link)
def download_s3_logs(args): sys.stderr.write(colored('Checking for S3 log files', 'cyan') + '\n') logs = logs_for_all_requests(args) async_requests = [] zipped_files = [] all_logs = [] for log_file in logs: filename = log_file['key'].rsplit("/", 1)[1] if logfetch_base.is_in_date_range(args, time_from_filename(filename)): if not already_downloaded(args.dest, filename): async_requests.append( grequests.AsyncRequest('GET', log_file['getUrl'], callback=generate_callback(log_file['getUrl'], args.dest, filename, args.chunk_size, args.verbose)) ) else: if args.verbose: sys.stderr.write(colored('Log already downloaded {0}'.format(filename), 'magenta') + '\n') all_logs.append('{0}/{1}'.format(args.dest, filename.replace('.gz', '.log'))) zipped_files.append('{0}/{1}'.format(args.dest, filename)) else: if args.verbose: sys.stderr.write(colored('Excluding {0}, not in date range'.format(filename), 'magenta') + '\n') if async_requests: sys.stderr.write(colored('Starting S3 Downloads with {0} parallel fetches'.format(args.num_parallel_fetches), 'cyan')) grequests.map(async_requests, stream=True, size=args.num_parallel_fetches) else: sys.stderr.write(colored('No S3 logs to download', 'cyan')) sys.stderr.write(colored('\nUnpacking S3 logs\n', 'cyan')) all_logs = all_logs + logfetch_base.unpack_logs(args, zipped_files) sys.stderr.write(colored('All S3 logs up to date', 'cyan') + '\n') return all_logs
def scrape(start, stop, season): DATA_DIR = 'data' if not os.path.exists(DATA_DIR): os.makedirs(DATA_DIR) def passer(request, e, **kwargs): pass def save_json(response, **kwargs): boxscore_json = response.json() try: if boxscore_json.get('Message') == 'An error has occurred.': return result_sets = boxscore_json['resultSets'] date = result_sets[0]["rowSet"][0][0].split("T")[0] game_id = boxscore_json['parameters']['GameID'] unique_id = "{}-{}.json".format(date, game_id) with open(os.path.join(DATA_DIR, unique_id), 'w') as outfile: json.dump(boxscore_json, outfile) except Exception as e: traceback.print_exc() finally: response.close() async_list = [] for game_num in GAMES_RANGE[start:stop]: game_id = "002{:02}0{:04}".format(season, game_num) url = 'http://stats.nba.com/stats/boxscore/?GameId={}&StartPeriod=0&EndPeriod=0&StartRange=0&EndRange=0&RangeType=0'.format(game_id) action_item = grequests.get(url, hooks={'response': save_json}) async_list.append(action_item) grequests.map(async_list, size=len(async_list), stream=False, exception_handler=passer)
def list_updates(request): if not request.access_token: try: responses = response_list_to_dict(grequests.map(request.view_requests)) except APIException as exc: return respond_with_error(request, exc) view_data = { 'user': False, 'site_section': 'updates', 'site': Site(responses[request.site_url]), } else: # pagination offset offset = int(request.GET.get('offset', 0)) url, params, headers = UpdateList.build_request(request.get_host(), offset=offset, access_token=request.access_token) request.view_requests.append(grequests.get(url, params=params, headers=headers)) try: responses = response_list_to_dict(grequests.map(request.view_requests)) except APIException as exc: return respond_with_error(request, exc) updates_list = UpdateList(responses[url]) view_data = { 'user': Profile(responses[request.whoami_url], summary=False), 'content': updates_list, 'pagination': build_pagination_links(responses[url]['updates']['links'], updates_list.updates), 'site_section': 'updates', 'site': Site(responses[request.site_url]), } return render(request, list_template, view_data)
def map(self, reqs, max_retries=None): if max_retries == None: max_retries = self.max_retries # TODO # There is no good way of catching or dealing with exceptions that are raised # during the request sending process when using map or imap. # When this issue is closed: https://github.com/kennethreitz/grequests/pull/15 # modify this method to repeat only the requests that failed because of # connection errors if self.async: import grequests responses = [( response.json() if response.ok else APIError() ) for response in grequests.map(reqs)] # retry the api calls that failed until they succeed or the max_retries limit is reached retries = 0 while True and retries < max_retries: n_errors = sum([int(isinstance(response, APIError)) for response in responses]) if not n_errors: break # sleep 2 seconds before retrying requests time.sleep(2) error_ids = [i for i, resp in enumerate(responses) if isinstance(responses[i], APIError)] new_reqs = [reqs[i] for i in range(len(responses)) if i in error_ids] new_resps = [( response.json() if response.ok else APIError() ) for response in grequests.map(new_reqs)] # update the responses that previously finished with errors for i in range(len(error_ids)): responses[error_ids[i]] = new_resps[i] retries += 1 return responses
def reap(file_name): config = ConfigParser.ConfigParser() config.read('combine.cfg') inbound_url_file = config.get('Reaper', 'inbound_urls') outbound_url_file = config.get('Reaper', 'outbound_urls') with open(inbound_url_file, 'rb') as f: inbound_urls = [url.rstrip('\n') for url in f.readlines()] with open(outbound_url_file, 'rb') as f: outbound_urls = [url.rstrip('\n') for url in f.readlines()] headers = {'User-Agent': 'harvest.py'} sys.stderr.write('Fetching inbound URLs\n') reqs = [grequests.get(url, headers=headers) for url in inbound_urls] inbound_responses = grequests.map(reqs) inbound_harvest = [(response.url, response.status_code, response.text) for response in inbound_responses] sys.stderr.write('Fetching outbound URLs\n') reqs = [grequests.get(url, headers=headers) for url in outbound_urls] outbound_responses = grequests.map(reqs) outbound_harvest = [(response.url, response.status_code, response.text) for response in outbound_responses] sys.stderr.write('Storing raw feeds in %s\n' % file_name) harvest = {'inbound': inbound_harvest, 'outbound': outbound_harvest} with open(file_name, 'wb') as f: json.dump(harvest, f, indent=2)
def download_s3_logs(args): if not args.silent: sys.stderr.write(colored('Checking for S3 log files', 'cyan') + '\n') callbacks.progress = 0 logs = logs_for_all_requests(args) async_requests = [] all_logs = [] for log_file in logs: filename = log_file['key'].rsplit("/", 1)[1] if log_file_in_date_range(args, log_file): if not args.logtype or log_matches(args, filename): logfetch_base.log(colored('Including log {0}'.format(filename), 'blue') + '\n', args, True) if not already_downloaded(args.dest, filename): async_requests.append( grequests.AsyncRequest('GET', log_file['getUrl'], callback=callbacks.generate_callback(log_file['getUrl'], args.dest, filename, args.chunk_size, args.verbose, args.silent), headers=args.headers) ) else: logfetch_base.log(colored('Log already downloaded {0}'.format(filename), 'blue') + '\n', args, True) all_logs.append('{0}/{1}'.format(args.dest, filename)) else: logfetch_base.log(colored('Excluding {0} log does not match logtype argument {1}'.format(filename, args.logtype), 'magenta') + '\n', args, True) else: logfetch_base.log(colored('Excluding {0}, not in date range'.format(filename), 'magenta') + '\n', args, True) if async_requests: logfetch_base.log(colored('Starting {0} S3 Downloads with {1} parallel fetches\n'.format(len(async_requests), args.num_parallel_fetches), 'cyan'), args, False) callbacks.goal = len(async_requests) grequests.map(async_requests, stream=True, size=args.num_parallel_fetches) else: logfetch_base.log(colored('No S3 logs to download\n', 'cyan'), args, False) logfetch_base.log(colored('All S3 logs up to date\n', 'cyan'), args, False) all_logs = modify_download_list(all_logs) return all_logs
def main(): resource.setrlimit(resource.RLIMIT_NOFILE, (2048, 2048)) hashes = set() past_urls = set() args = setup_args(sys.argv[1:]) if args.config: cfg = config(args, args.config) else: cfg = config(args, 'maltrieve.cfg') cfg.check_proxy() hashes = load_hashes('hashes.json') past_urls = load_urls('urls.json') print "Processing source URLs" # TODO: Replace with plugins source_urls = {'https://zeustracker.abuse.ch/monitor.php?urlfeed=binaries': process_xml_list_desc, 'http://www.malwaredomainlist.com/hostslist/mdl.xml': process_xml_list_desc, 'http://malc0de.com/rss/': process_xml_list_desc, 'http://vxvault.net/URL_List.php': process_simple_list, 'http://urlquery.net/': process_urlquery, 'http://support.clean-mx.de/clean-mx/rss?scope=viruses&limit=0%2C64': process_xml_list_title, 'http://malwareurls.joxeankoret.com/normal.txt': process_simple_list} headers = {'User-Agent': 'Maltrieve'} reqs = [grequests.get(url, timeout=60, headers=headers, proxies=cfg.proxy) for url in source_urls] source_lists = grequests.map(reqs) print "Completed source processing" headers['User-Agent'] = cfg.useragent malware_urls = set() for response in source_lists: if hasattr(response, 'status_code') and response.status_code == 200: malware_urls.update(source_urls[response.url](response.text)) if cfg.inputfile: with open(cfg.inputfile, 'rb') as f: moar_urls = list(f) malware_urls.update(moar_urls) print "Downloading samples, check log for details" malware_urls -= past_urls reqs = [grequests.get(url, timeout=60, headers=headers, proxies=cfg.proxy) for url in malware_urls] for chunk in chunker(reqs, 32): malware_downloads = grequests.map(chunk) for each in malware_downloads: if not each or each.status_code != 200: continue if save_malware(each, cfg): past_urls.add(each.url) print "Completed downloads" save_urls(past_urls, 'urls.json') save_hashes(hashes, 'hashes.json') sort_downloads()
def list(request): # TODO: need a user friendly error page for unregistered users # TODO: remove 'site_section' if not request.access_token: responses = response_list_to_dict(grequests.map(request.view_requests)) view_data = { 'user': False, 'site_section': 'updates', 'site': Site(responses[request.site_url]), } else: # pagination offset try: offset = int(request.GET.get('offset', 0)) except ValueError: offset = 0 url, params, headers = UpdateList.build_request(request.get_host(), offset=offset, access_token=request.access_token) request.view_requests.append(grequests.get(url, params=params, headers=headers)) responses = response_list_to_dict(grequests.map(request.view_requests)) updates_list = UpdateList(responses[url]) view_data = { 'user': Profile(responses[request.whoami_url], summary=False), 'content': updates_list, 'pagination': build_pagination_links(responses[url]['updates']['links'], updates_list.updates), 'site_section': 'updates', 'site': Site(responses[request.site_url]), } return render(request, UpdateView.list_template, view_data)
def search(query, request, selected_engines): global engines, categories, number_of_searches requests = [] results = {} suggestions = set() number_of_searches += 1 #user_agent = request.headers.get('User-Agent', '') user_agent = gen_useragent() for selected_engine in selected_engines: if selected_engine['name'] not in engines: continue engine = engines[selected_engine['name']] request_params = default_request_params() request_params['headers']['User-Agent'] = user_agent request_params['category'] = selected_engine['category'] request_params['started'] = datetime.now() request_params = engine.request(query, request_params) callback = make_callback( selected_engine['name'], results, suggestions, engine.response, request_params ) request_args = dict( headers=request_params['headers'], hooks=dict(response=callback), cookies=request_params['cookies'], timeout=settings['server']['request_timeout'] ) if request_params['method'] == 'GET': req = grequests.get else: req = grequests.post request_args['data'] = request_params['data'] # ignoring empty urls if not request_params['url']: continue requests.append(req(request_params['url'], **request_args)) grequests.map(requests) for engine_name, engine_results in results.items(): engines[engine_name].stats['search_count'] += 1 engines[engine_name].stats['result_count'] += len(engine_results) results = score_results(results) for result in results: for res_engine in result['engines']: engines[result['engine']].stats['score_count'] += result['score'] return results, suggestions
def testRead(): getUrls = [] for i in range(readNum): getUrls.append(grequests.get(readTestUrl)) start = time.time() print grequests.map(getUrls) end = time.time() print "duration is " + str(end - start) + " seconds."
def update_ratings(beers): rs = [] for beer in beers: if beer.url: def update_rating(r, beer=beer, **kwargs): beer.rating = extract_rating(r.content) rs.append(grequests.get(beer.url, hooks={'response': update_rating})) grequests.map(rs)
def _post(self, endpoint="", data=dict()): assert isinstance(data, dict), 'Field <data> must be a dict.' r = gr.post(self.url + self.get_endpoint(endpoint), data=self.dumps(data), headers=self.headers) gr.map([r],exception_handler=exception_handler) return r.response
def create(request): """ Create a comment, processing any attachments (including deletion of attachments) and redirecting to the single comment form if there are any validation errors. """ # TODO: determine whether the single comment creation form will use this view. # Remove the conditional if not. if request.method == 'POST': form = CommentForm(request.POST) # If invalid, load single comment view showing validation errors. if not form.is_valid(): try: responses = response_list_to_dict(grequests.map(request.view_requests)) except APIException as exc: return respond_with_error(request, exc) view_data = { 'user': Profile(responses[request.whoami_url], summary=False), 'site': Site(responses[request.site_url]), 'form': form, } return render(request, form_template, view_data) # Create comment with API. comment_request = Comment.from_create_form(form.cleaned_data) try: comment = comment_request.create(request.get_host(), access_token=request.access_token) except APIException as exc: return respond_with_error(request, exc) try: process_attachments(request, comment) except ValidationError: try: responses = response_list_to_dict(grequests.map(request.view_requests)) except APIException as exc: return respond_with_error(request, exc) comment_form = CommentForm( initial = { 'itemId': comment.item_id, 'itemType': comment.item_type, 'comment_id': comment.id, 'markdown': request.POST['markdown'], } ) view_data = { 'user': Profile(responses[request.whoami_url], summary=False), 'site': Site(responses[request.site_url]), 'content': comment, 'comment_form': comment_form, 'error': 'Sorry, one of your files was over 5MB. Please try again.', } return render(request, form_template, view_data) # API returns which page in the thread this comments appear in, so redirect there. if comment.meta.links.get('commentPage'): return HttpResponseRedirect(build_comment_location(comment))
def testWrite(): postRequest = [] jdata = json.loads(jsonData) for i in range(writeNum): postRequest.append(grequests.post(writeTestUrl,json=jdata)) start = time.time() print grequests.map(postRequest) end = time.time() print "duration is "+str(end-start) + " seconds."
def download_live_logs(args): tasks = tasks_to_check(args) async_requests = [] zipped_files = [] all_logs = [] sys.stderr.write(colored('Finding current live log files', 'cyan') + '\n') for task in tasks: metadata = files_json(args, task) if 'slaveHostname' in metadata: uri = DOWNLOAD_FILE_FORMAT.format(metadata['slaveHostname']) if args.verbose: sys.stderr.write(colored('Finding logs in base directory on {0}'.format(metadata['slaveHostname']), 'magenta') + '\n') for log_file in base_directory_files(args, task, metadata): logfile_name = '{0}-{1}'.format(task, log_file) if not args.logtype or (args.logtype and logfetch_base.log_matches(log_file, args.logtype.replace('logs/', ''))): if should_download(args, logfile_name, task): async_requests.append( grequests.AsyncRequest('GET',uri , callback=generate_callback(uri, args.dest, logfile_name, args.chunk_size, args.verbose), params={'path' : '{0}/{1}/{2}'.format(metadata['fullPathToRoot'], metadata['currentDirectory'], log_file)}, headers=args.headers ) ) if logfile_name.endswith('.gz'): zipped_files.append('{0}/{1}'.format(args.dest, logfile_name)) else: all_logs.append('{0}/{1}'.format(args.dest, logfile_name.replace('.gz', '.log'))) elif args.logtype and args.verbose: sys.stderr.write(colored('Excluding log {0}, doesn\'t match {1}'.format(log_file, args.logtype), 'magenta') + '\n') if args.verbose: sys.stderr.write(colored('Finding logs in logs directory on {0}'.format(metadata['slaveHostname']), 'magenta') + '\n') for log_file in logs_folder_files(args, task): logfile_name = '{0}-{1}'.format(task, log_file) if not args.logtype or (args.logtype and logfetch_base.log_matches(log_file, args.logtype.replace('logs/', ''))): if should_download(args, logfile_name, task): async_requests.append( grequests.AsyncRequest('GET',uri , callback=generate_callback(uri, args.dest, logfile_name, args.chunk_size, args.verbose), params={'path' : '{0}/{1}/logs/{2}'.format(metadata['fullPathToRoot'], metadata['currentDirectory'], log_file)}, headers=args.headers ) ) if logfile_name.endswith('.gz'): zipped_files.append('{0}/{1}'.format(args.dest, logfile_name)) else: all_logs.append('{0}/{1}'.format(args.dest, logfile_name.replace('.gz', '.log'))) elif args.logtype and args.verbose: sys.stderr.write(colored('Excluding log {0}, doesn\'t match {1}'.format(log_file, args.logtype), 'magenta') + '\n') if async_requests: sys.stderr.write(colored('Starting live logs downloads\n', 'cyan')) grequests.map(async_requests, stream=True, size=args.num_parallel_fetches) if zipped_files: sys.stderr.write(colored('\nUnpacking logs\n', 'cyan')) all_logs = all_logs + logfetch_base.unpack_logs(args, zipped_files) return all_logs
def kill_all(): rs = list() for worker in config.workers.values(): addr = worker.get_address("/kill") rs.append(grequests.get(addr)) grequests.map(rs) shutdown_server() return "Server is dead my friend"
def square(msg): h = {'Content-type': 'application/json', 'Accept': 'text/plain'} q.append(msg) if len(q) > 100: start = time.time() r = ((grequests.post("http://localhost/sensor/haystack/", data=d,headers=h)) for d in q) grequests.map(r) q.clear() print time.time()-start
def reap(file_name): config = ConfigParser.SafeConfigParser(allow_no_value=False) cfg_success = config.read('combine.cfg') if not cfg_success: logger.error('Reaper: Could not read combine.cfg.') logger.error('HINT: edit combine-example.cfg and save as combine.cfg.') return inbound_url_file = config.get('Reaper', 'inbound_urls') outbound_url_file = config.get('Reaper', 'outbound_urls') try: with open(inbound_url_file, 'rb') as f: inbound_urls = [url.rstrip('\n') for url in f.readlines()] except EnvironmentError as e: logger.error('Reaper: Error while opening "%s" - %s' % (inbound_url_file, e.strerror)) return try: with open(outbound_url_file, 'rb') as f: outbound_urls = [url.rstrip('\n') for url in f.readlines()] except EnvironmentError as e: logger.error('Reaper: Error while opening "%s" - %s' % (outbound_url_file, e.strerror)) return logger.info('Fetching inbound URLs') inbound_files=[] for url in inbound_urls: if url.startswith('file://'): inbound_files.append(url.partition('://')[2]) inbound_urls.remove(url) headers = {'User-Agent': 'harvest.py'} reqs = [grequests.get(url, headers=headers) for url in inbound_urls] inbound_responses = grequests.map(reqs) inbound_harvest = [(response.url, response.status_code, response.text) for response in inbound_responses if response] for each in inbound_files: with open(each,'rb') as f: inbound_harvest.append(('file://'+each, 200, f.read())) logger.info('Fetching outbound URLs') outbound_files=[] for url in outbound_urls: if url.startswith('file://'): outbound_files.append(url.partition('://')[2]) outbound_urls.remove(url) reqs = [grequests.get(url, headers=headers) for url in outbound_urls] outbound_responses = grequests.map(reqs) outbound_harvest = [(response.url, response.status_code, response.text) for response in outbound_responses if response] for each in outbound_files: with open(each,'rb') as f: outbound_harvest.append(('file://'+each, 200, f.read())) logger.error('Storing raw feeds in %s' % file_name) harvest = {'inbound': inbound_harvest, 'outbound': outbound_harvest} with open(file_name, 'wb') as f: json.dump(harvest, f, indent=2)
def _simulate(self, stream, numhosts, runinterval): while True: reqs = [] bufsize = 10 for i in range(bufsize): f, r, c = stream.next() reqs += [self.submit_command( f['certname'], 'replace_facts', f)] grequests.map(reqs)
def download_s3_logs(args): if not args.silent: sys.stderr.write(colored("Checking for S3 log files", "cyan") + "\n") callbacks.progress = 0 logs = logs_for_all_requests(args) async_requests = [] all_logs = [] for log_file in logs: filename = log_file["key"].rsplit("/", 1)[1] if logfetch_base.is_in_date_range(args, int(str(log_file["lastModified"])[0:-3])): if not args.logtype or log_matches(args, filename): logfetch_base.log(colored("Including log {0}".format(filename), "blue") + "\n", args, True) if not already_downloaded(args.dest, filename): async_requests.append( grequests.AsyncRequest( "GET", log_file["getUrl"], callback=callbacks.generate_callback( log_file["getUrl"], args.dest, filename, args.chunk_size, args.verbose, args.silent ), headers=args.headers, ) ) else: logfetch_base.log(colored("Log already downloaded {0}".format(filename), "blue") + "\n", args, True) all_logs.append("{0}/{1}".format(args.dest, filename)) else: logfetch_base.log( colored( "Excluding {0} log does not match logtype argument {1}".format(filename, args.logtype), "magenta", ) + "\n", args, True, ) else: logfetch_base.log( colored("Excluding {0}, not in date range".format(filename), "magenta") + "\n", args, True ) if async_requests: logfetch_base.log( colored( "Starting {0} S3 Downloads with {1} parallel fetches\n".format( len(async_requests), args.num_parallel_fetches ), "cyan", ), args, False, ) callbacks.goal = len(async_requests) grequests.map(async_requests, stream=True, size=args.num_parallel_fetches) else: logfetch_base.log(colored("No S3 logs to download\n", "cyan"), args, False) logfetch_base.log(colored("All S3 logs up to date\n", "cyan"), args, False) return all_logs
def process(school_code, roll_no_range, centre_no, net_choice): if not os.path.exists('./webpages'): os.mkdir('./webpages') database_conn = sqlite3.connect('raw_data.sqlite') cursor = database_conn.cursor() cursor.executescript(''' DROP TABLE IF EXISTS Records; DROP TABLE IF EXISTS Marks; CREATE TABLE IF NOT EXISTS Records ( Roll_Number INTEGER PRIMARY KEY, Name TEXT, Father_Name TEXT, Mother_Name TEXT, Final_Result TEXT, Number_of_subjects INTEGER ); CREATE TABLE IF NOT EXISTS Marks ( Roll_Number INTEGER, Subject_Code TEXT, Subject_Name TEXT, Theory_Marks INTEGER, Practical_Marks INTEGER, Total_Marks INTEGER, Grade TEXT ) ''') roll_no_range = process_range(roll_no_range) if net_choice == 'y': net_choice = True elif net_choice == 'n': net_choice = False else: print('\nIncorrect Network mode chosen, defaulting to non-async\n') net_choice = False count = 0 headers = { 'Referer': 'http://cbseresults.nic.in/class12npy/class12th17reval.htm', 'Upgrade-Insecure-grequests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)' ' Chrome/58.0.3029.110 Safari/537.36' } payloads = [{ 'regno': roll_no, 'sch': school_code, 'cno': centre_no, 'B2': 'Submit' } for roll_no in roll_no_range] base_url = 'http://cbseresults.nic.in/class12npy/class12th17reval.asp' print( 'Retrieving data for {} students, may take a few seconds depending on the network\n' .format(len(payloads))) if net_choice: responses = (grequests.post(base_url, headers=headers, data=load) for load in payloads) page_sources = grequests.map(responses) else: page_sources = list() for load in payloads: roll_no = load['regno'] try: page_sources.append( requests.post(base_url, headers=headers, data=load)) except ConnectionError: page_sources.append(None) except Exception as error: page_sources.append(None) print( 'AAAAHHHHHH. Roll No. {} threw an unknown, unexpected error, call the developer.' .format(roll_no)) print('Report this error to him: {}'.format(error)) print( 'Retrieved data for {} records out of {} records asked for.\n'.format( len(page_sources), len(payloads))) for page_source in page_sources: roll_no = roll_no_range[page_sources.index(page_source)] try: if page_source and page_source.status_code == 200: data = parser(page_source.text) cursor.execute( 'INSERT INTO Records (Roll_Number, Name, Father_Name, Mother_Name, Final_Result, ' 'Number_of_subjects) VALUES (?, ?, ?, ?, ?, ?)', ( data['Roll No:'], data['Candidate Name:'], data['Father\'s Name:'], data['Mother\'s Name:'], data['final_result'], len(data['marks']), )) for subject in data['marks']: cursor.execute( 'INSERT INTO Marks (Roll_Number, Subject_Code, Subject_Name, Theory_Marks,' 'Practical_Marks, Total_Marks, Grade) VALUES (?, ?, ?, ?, ?, ?, ?)', ( data['Roll No:'], subject['SUB CODE'], subject['SUB NAME'], subject['THEORY'], subject['PRACTICAL'], subject['MARKS'], subject['GRADE'], )) with open( './webpages/{}-{}.html'.format( data['Roll No:'], data['Candidate Name:']), 'w') as html_page: html_page.write(page_source.text) count += 1 else: print( 'Failed to retrieve data for Roll No. {}'.format(roll_no)) except IndexError: print( 'Result not found for this Roll Number-School Code combination: {}-{}' .format(roll_no, school_code)) except Exception as error: print( 'AAAAHHHHHH. Roll No. {} threw an unknown, unexpected error, call the developer.' .format(roll_no)) print('Report this error to him: {}'.format(error)) database_conn.commit() database_conn.close() print('{} valid records downloaded and saved'.format(count))
print("Новые часы") ip_addr = (address).encode('utf-8') name_w = (name).encode('utf-8') id = hashlib.sha224(ip_addr + name_w).hexdigest() print("For address {addr} watch found".format(addr=address)) print(id) query = "insert into Watchs values ('{id_w}','{ip_w}','{name_w}')".format( id_w=id, ip_w=address, name_w=name) print(query) cursor.execute(query) result = "Add new" async_list = [] for i in range(1, 255): adr = ip.format(addr=i) + "/cgi-bin/getName.py" action_item = grequests.get(adr, hooks={'response': [do_something]}, timeout=1) async_list.append(action_item) n = '' n = grequests.map(async_list) #while (n == ''): # pass #sleep(1.5) db.commit() db.close() print("Content-type: text/html\n") print(json.dumps({'result': result}))
json={'chain': result}, headers=headers) print("\n ////new_chain_length_and_chain\n") print(r.text) i_need_the_chain = False mining = True if (mining and not i_need_the_chain ) or not (len(node.inbound) > len(connections)): reqs = [ grequests.get('http://0.0.0.0:' + port + '/previous'), grequests.get('http://0.0.0.0:' + port + '/mine') ] results = grequests.map(reqs, exception_handler=exception_handler) previous_block = results[0].content.decode() mined_block = results[1].content.decode() print('\n //// next to last block mined\n') print(previous_block) print('\n //// last block mined\n') print(mined_block) did_mine_block = True mining = False for con in node: con.send_line(port)
def _process_request(self, async_request, register): async_result = grequests.map([async_request]) async_result = async_result[0] async_result.raise_for_status() result = register.parse_result(async_result.text) return result
end = int(sys.argv[2]) results = { 'desk': 0, 'deskT': 0, 'computer': 0, 'computerT': 0, 'table': 0, 'tableT': 0 } urls = [ "https://dweet.io/get/latest/dweet/for/openSeat%s" % "%.3d" % i for i in xrange(begin, end) ] requests = (grequests.get(u) for u in urls) responses = grequests.map(requests) for r in responses: jsondata = r.json() wanted = jsondata['with'] j = wanted[0].values()[0] if 'desk' in j and 'open' in j.values()[0]: results['desk'] += 1 elif 'table' in j and 'open' in j.values()[0]: results['table'] += 1 elif 'computer' in j and 'open' in j.values()[0]: results['computer'] += 1 elif 'desk' in j and 'taken' in j.values()[0]: results['deskT'] += 1 elif 'table' in j and 'taken' in j.values()[0]: results['tableT'] += 1 elif 'computer' in j and 'taken' in j.values()[0]:
def get_links(prize_id, prize_name): url = 'http://tools.torebaprizewatcher.com/serverside/get_prize_detail.php' headers = { 'Pragma': 'no-cache', 'DNT': '1', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-US,en;q=0.9,th;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Cache-Control': 'no-cache', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', 'Referer': 'http://tools.torebaprizewatcher.com/prize.html', } params = (('item', prize_id), ) data = '[]' response = requests.get(url, headers=headers, params=params) data = response.text data = json.loads(data) if 'data' in data: toreba_urls = [] data = data['data'] cookies = browser_cookie3.chrome() for item in data: headers = { 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36', 'DNT': '1', 'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-US,en;q=0.9,th;q=0.8', 'Referer': item['replay_url'] } toreba_urls.append( grequests.get(item['replay_url'], headers=headers, cookies=cookies)) responses = grequests.map(toreba_urls) # build html response html_response = '<h1 style="text-align:center;">{}: {}</h1>'.format( prize_id, prize_name) html_response += '<div style="display:flex;flex-wrap:wrap;justify-content:center;">' for i, response in enumerate(responses): html = bs4.BeautifulSoup(response.text, 'html.parser') video_elem = html.select_one('video source') video_date = html.select_one('.uploadtime_replay.p8') # extract the time only, remove other characters video_date = re.sub('[^0-9\-\: ]', '', video_date.text) html_response += '<div style="margin:30px;display:flex;flex-direction:row;">' html_response += '<div>' html_response += '<div style="font-size:36px;margin-right:10px;">{}</div>'.format( i + 1) html_response += '<div style="font-size:12px;margin-right:10px;">{}</div>'.format( video_date) html_response += '</div>' html_response += '<video preload="auto" src="{}" controls></video>'.format( video_elem['src']) html_response += '</div>' html_response += '</div>' return html_response else: print('No replays for {} - {}'.format(prize_id, prize_name)) return ''
def get_data(urls): """ Get data of urls with grequests. """ req = (grequests.get(link) for link in urls) response = grequests.map(req) return response
log = open('data/raw-games/log.txt', "w") urls = [] for acc in ladder: acc = acc[:-1] urls.append( 'https://tft.iesdev.com/graphql?query=query summonerGames($name: String!, $region: String!, $cursor: String) { summoner(name: $name, region: $region) { id name puuid games(first: 20, after: $cursor) { edges { node { id createdAt length queueId isRanked players } } pageInfo { endCursor hasNextPage } } } } &variables={"name": "' + acc + '","region":"' + region + '"}') batches = 20 urls = [urls[i::batches] for i in range(batches)] for i in range(batches): rs = (grequests.get(url) for url in urls[i]) rs_map = grequests.map(rs) for response in rs_map: if response.status_code == 200: data = response.json()["data"]["summoner"] if data is not None: edges = data["games"]["edges"] for game in edges: info = game["node"] id = info["id"] if id not in mark_game: mark_game.add(id) with open( config.get('setup', 'raw_data_dir') + '/{}/{}.json'.format(region, id), "w") as file: file.write(json.dumps(info))
def run(): symbol2code = {} stock_map = {} for stock in main_session.query(models.DailyBasic).all(): ts_code = stock.ts_code market = ts_code.split('.')[1].lower() symbol = ts_code.split('.')[0] code = '{market}{symbol}'.format(market=market, symbol=symbol) symbol2code[symbol] = code stock_map[code] = {'circ_mv': float(stock.circ_mv)} batch_size = 500 req_list = [] for i in range(0, len(target_symbols), batch_size): keys = [] for symbol in target_symbols[i:i + batch_size]: query_key = symbol2code[symbol] keys.append(query_key) req_list.append( grequests.get('http://hq.sinajs.cn/list={}'.format( ','.join(keys)))) while True: time_a = time.time() try: responses = grequests.map(req_list) print('====== {} ======'.format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) displays = [] for response in responses: res = response.text.strip().split(';\n') for i in res: j = i.split(',') name = j[0].split('="')[1] code = j[0].split('="')[0].split('_')[-1] yesterday_closing_price = float(j[2]) current_price = float(j[3]) # today_max_price = float(j[4]) # buy_one_price = float(j[6]) # buy_one_vol = float(j[10]) # today_limit_price = round(yesterday_closing_price * 1.1, 2) chg = (current_price / yesterday_closing_price - 1) chg_display = '{}%'.format(round(chg * 100, 2)) circ_mv = stock_map[code]['circ_mv'] # if_display = False # type = 1 # if today_max_price == today_limit_price: #摸过板的 # # if_display = True # if buy_one_price < today_limit_price: #开板 # # if_display = True # pass # elif buy_one_price * buy_one_vol < 10000000: #封单小于1kw # # if_display = True # type = 2 # elif chg > 0.05: # if_display = True # if_display = True # if if_display: # if type == 2: # displays.append({ # 'note': '{code}\t{name}\tchg:{chg}\tprice:{price}\tcirc_mv:{circ_mv}亿\t封单:{vol}手'.format(code=code, name=name, chg=chg_display, # price=round(current_price, 2), circ_mv=int(circ_mv), vol=int(buy_one_vol / 100)), # 'chg': chg # }) # else: displays.append({ 'note': '{code}\t{name}\tchg:{chg}\tprice:{price}\tcirc_mv:{circ_mv}亿' .format(code=code, name=name, chg=chg_display, price=round(current_price, 2), circ_mv=int(circ_mv)), 'chg': chg }) displays.sort(key=lambda x: x['chg'], reverse=False) notes = [i['note'] for i in displays] print('\n'.join(notes)) except Exception as e: print(e) continue time_b = time.time() cost = time_b - time_a time.sleep(1 - cost)
def __codeforces(self): urls = { "user_info": { "url": f'https://codeforces.com/api/user.info?handles={self.__username}' }, "user_contests": { "url": f'https://codeforces.com/contests/with/{self.__username}' } } reqs = [ grequests.get(item["url"]) for item in urls.values() if item.get("url") ] responses = grequests.map(reqs) details_api = {} contests = [] for page in responses: if page.status_code != 200: raise UsernameError('User not Found') if page.request.url == urls["user_info"]["url"]: details_api = page.json() elif page.request.url == urls["user_contests"]["url"]: soup = BeautifulSoup(page.text, 'html.parser') table = soup.find('table', attrs={'class': 'user-contests-table'}) table_body = table.find('tbody') rows = table_body.find_all('tr') for row in rows: cols = row.find_all('td') cols = [ele.text.strip() for ele in cols] contests.append({ "Contest": cols[1], "Rank": cols[3], "Solved": cols[4], "Rating Change": cols[5], "New Rating": cols[6] }) if details_api.get('status') != 'OK': raise UsernameError('User not Found') details_api = details_api['result'][0] try: rating = details_api['rating'] max_rating = details_api['maxRating'] rank = details_api['rank'] max_rank = details_api['maxRank'] except KeyError: rating = 'Unrated' max_rating = 'Unrated' rank = 'Unrated' max_rank = 'Unrated' return { 'status': 'Success', 'username': self.__username, 'platform': 'Codeforces', 'rating': rating, 'max rating': max_rating, 'rank': rank, 'max rank': max_rank, 'contests': contests }
def req_all(binary_data): save_image(binary_data) print("Got image of size {}".format(len(binary_data))) binary_data = downscale(binary_data) print("Resized to {}".format(len(binary_data))) b64image = base64.b64encode(binary_data) fpp = req_facepp(b64image) requests = [fpp] if hasattr(config, 'OMC_SERVER'): omc = req_omc(binary_data) requests.append(omc) # req all concurrently results = grequests.map(requests, exception_handler=exception_handler) try: fpp = results[0].json() except: print(results[0].content) fpp = {'faces': []} if hasattr(config, 'OMC_SERVER'): try: omc = results[-1].json() except: print(results[-1]) omc = [] else: omc = [] faces = [] # rescale pixel coordinates for ff in fpp['faces']: for key in ff['face_rectangle']: ff['face_rectangle'][key] *= 1.0 / scale for of in omc: for key in of['face_rectangle']: of['face_rectangle'][key] *= 1.0 / scale for ff in fpp['faces']: ffc = np.array( (ff['face_rectangle']['left'] + ff['face_rectangle']['width'] / 2, ff['face_rectangle']['top'] + ff['face_rectangle']['height'] / 2)) minD = 9999 minOMC = None for of in omc: ofc = np.array((of['face_rectangle']['left'] + of['face_rectangle']['width'] / 2, of['face_rectangle']['top'] + of['face_rectangle']['height'] / 2)) d = np.linalg.norm(ofc - ffc) if d < minD: minD = d minOMC = of attrs = { "kairos": { "topLeftX": ff['face_rectangle']['left'], "topLeftY": ff['face_rectangle']['top'], "width": ff['face_rectangle']['width'], "height": ff['face_rectangle']['height'] }, "fpp": ff, "of": minOMC } attrs['text'] = to_text(attrs) faces.append(attrs) return faces
def main(): # using argparse to let use import start and end emailsids parser = argparse.ArgumentParser(description="GET DNC EMAILS." parser.add_argument("--start", dest="start", default=START, help="emailid to start the crawl at") parser.add_argument("--end", dest="end", default=MAX_EMAILS, help="emailid to end the crawl at") parser.add_argument("--async", dest="async", default= ASYNC, help="1, 0 for use of async request or not respectively. Use this if you dont care about the order of the requests", default=0) parser.add_argument("---data-dir", dest=DNC_DIR, default=DNC_DIR, help="Give te directory of where to store the emails") args = parser.parser_args() # set user agent s = requests.session() # being polite : we"ll let wikileks know who we are and where thdey can find more info s.headers.update({"User-Agent": "Requests-- DNCBotty: A collecting emails" "for a quick project. https://github.com/DNCBotty"}) # create directory isit not exit try: if not os.path.exists(DNC_DIR): os.makedirs(DNC_DIR) # except OSError: print("OSError") sys.exit(1) # create async requests using # using map instead of imap so I can match the response to the emailid # NOTE ( if aysnc then you can"t (that i know of) recover which requests failed ) if args.end < MAX_EMAILS: MAX_EMAILS = end rs = (grequests.get(u, session=s) for u in (URL + str(i) for i in xrange(START, MAX_EMAILS+1 ))) # async requests with grequests.imap, if user wants. if ASYNC: resp = grequests.map(rs, size=POOL_SIZE) else: resp = grequests.imap(rs, size=POOL_SIZE) # loop through responses for (i, r) in zip(xrange(1, MAX_EMAILS+1), resp): if r.ok: # reset fail_count if responses are "ok" and fail_count > MAX_FAIL fail_count = 0 json.dump({"data" : r.text}, open(os.path.join(DNC_DIR, "_" + str(i) + ".json"), "w")) else: # simple write the emailID of the request that failed to failed_emails.txt file try: print("Failed to get email with email id {}".format(i), file=open(os.path.join(DNC_DIR, "failed_emails.txt", "w"))) except Exception as e: print(e) # if fail_count emails fail consecutively kill script fail_count+=1 if fail_count > MAX_FAIL: sys.exit("responses failed %s consecutive times..Quitinsg" %MAX_FAIL) finally: # back-0ff for courtesy if (i % CHUNK_TO_SLEEP) & (i != 0): time.sleep(SLEEP) if __name__ == "__main__": main()
def port_scan_check(queue_targets, q_targets, args, q_results, tasks_count): """ 检测端口是否开放 no_scripts -> null 无限制 -> 1 目标重复, 优先级是最高的 -> 2 邻居,为节省时间而禁用的 """ while True: try: url, is_neighbor = queue_targets.get_nowait() except Queue.Empty as e: break try: # scheme netloc path if url.find('://') < 0: scheme = 'unknown' netloc = url[:url.find('/')] if url.find('/') > 0 else url path = '' else: scheme, netloc, path, params, query, fragment = urlparse.urlparse( url, 'http') # host port if netloc.find(':') >= 0: _ = netloc.split(':') host = _[0] port = int(_[1]) else: host = netloc port = None if scheme == 'https' and port is None: port = 443 elif scheme == 'http' and port is None: port = 80 if scheme == 'unknown': if port == 80: scheme = 'http' if port == 443: scheme = 'https' ports_open = set() ports_closed = set() # 插件不依赖HTTP连接池 & 仅启用插件扫描, 则不需要检查80/443端口的HTTP服务 if args.scripts_only and args.require_no_http: ports_open, ports_closed = scan_given_ports( ports_open, ports_closed, host, args.require_ports) target = { 'scheme': scheme, 'host': host, 'port': port, 'path': path, 'has_http': False, 'ports_open': ports_open, 'ports_closed': ports_closed } add_target(q_targets, q_results, target, tasks_count, args) continue if port: # 标准端口 或 非标准端口 has_http = is_port_open(host, port) if has_http: ports_open.add(port) else: ports_closed.add(port) if not args.no_scripts: ports_open, ports_closed = scan_given_ports( ports_open, ports_closed, host, args.require_ports) target = { 'scheme': scheme, 'host': host, 'port': port, 'path': path, 'has_http': has_http, 'ports_open': ports_open, 'ports_closed': ports_closed } add_target(q_targets, q_results, target, tasks_count, args) else: port_open_80 = is_port_open(host, 80) port_open_443 = is_port_open(host, 443) if port_open_80: ports_open.add(80) else: ports_closed.add(80) if port_open_443: ports_open.add(80) else: ports_closed.add(80) if not args.no_scripts: ports_open, ports_closed = scan_given_ports( ports_open, ports_closed, host, args.require_ports) if port_open_80 and port_open_443: target = { 'scheme': 'https', 'host': host, 'port': 443, 'path': path, 'has_http': True, 'ports_open': ports_open, 'ports_closed': ports_closed } add_target(q_targets, q_results, target, tasks_count, args, is_neighbor) # 排除 301 http 跳转 https import grequests r = grequests.map([ grequests.get('http://%s' % host, allow_redirects=False, timeout=20) ])[0] if r and not (r.status_code == 301 and r.headers.get( 'Location', '').lower().startswith('https')): target = { 'scheme': 'http', 'host': host, 'port': 80, 'path': path, 'has_http': True, 'no_scripts': 1, 'ports_open': ports_open, 'ports_closed': ports_closed } add_target(q_targets, q_results, target, tasks_count, args) elif port_open_443: target = { 'scheme': 'https', 'host': host, 'port': 443, 'path': path, 'has_http': True, 'ports_open': ports_open, 'ports_closed': ports_closed } # 即使指定的一些目标,允许插件扫描,邻居也不启用,节省扫描时间 add_target(q_targets, q_results, target, tasks_count, args, is_neighbor) elif port_open_80: target = { 'scheme': 'http', 'host': host, 'port': 80, 'path': path, 'has_http': True, 'ports_open': ports_open, 'ports_closed': ports_closed } add_target(q_targets, q_results, target, tasks_count, args, is_neighbor) elif args.no_scripts: # 80 443 端口不开放, 禁用插件扫描 q_results.put('No ports open: %s' % host) elif not is_neighbor or args.scripts_only: # 直接输入目标 或者 对邻居应用插件 target = { 'scheme': 'http', 'host': host, 'port': 80, 'path': path, 'has_http': False, 'ports_open': ports_open, 'ports_closed': ports_closed } add_target(q_targets, q_results, target, tasks_count, args) except requests.exceptions.RequestException as e: pass except Exception as e: import traceback q_results.put(traceback.format_exc()) q_results.put('[port_scan_check] %s' % str(e))
def validate_email(email): email_info = { "email": email, "exists": False, "emailrep_limit_reached": False } headers = {'User-Agent': 'Buster - email OSINT tool'} accounts = [] sources = [] reqs = [ grequests.get("https://emailrep.io/" + email), grequests.get("https://myspace.com/search/people?q=" + email), grequests.get("https://api.github.com/search/users?q=" + email + "+in:email"), grequests.get('https://darksearch.io/api/search?query="' + email + '"'), grequests.get("https://haveibeenpwned.com/api/v2/pasteaccount/" + email, headers=headers), grequests.post("https://digibody.avast.com/v1/web/leaks", json={"email": email}) ] response = grequests.map(reqs) if response[0].status_code == 200: data = response[0].json() if (data["details"]["deliverable"] == True): email_info["exists"] = True if (data["details"]["last_seen"] != "never"): email_info["exists"] = True if (data["details"]["profiles"] != []): email_info["exists"] = True email_info["profiles"] = data["details"]["profiles"] if "gravatar" in email_info["profiles"]: gravatar = email2gravatar(email) if gravatar != "": accounts.append(gravatar) if "aboutme" in email_info["profiles"]: aboutme = email2aboutme(email) if aboutme != "": accounts.append(aboutme) if "linkedin" in email_info["profiles"]: accounts.append(email2linkedin(email)) myspace = email2myspace(email, response[1]) if (myspace != []): email_info["exists"] = True accounts.extend(myspace) github = email2github(email, response[2]) if (github != ""): email_info["exists"] = True accounts.append(github) darksearch_sources = dark_search(email, response[3]) if (darksearch_sources != []): email_info["exists"] = True sources.extend(darksearch_sources) #breaches=email_breaches(email,response[4]) #if(breaches != []): #email_info["exists"]=True #email_info["breaches"]=breaches pastes = email_pastes(email, response[4]) if (pastes != []): email_info["exists"] = True email_info["pastes"] = pastes breached_accts = email2breachedaccts(email, response[5]) if (breached_accts["accounts"] != []): email_info["exists"] = True accounts.extend(breached_accts["accounts"]) if (breached_accts["breaches"] != []): email_info["exists"] = True email_info["breaches"] = breached_accts["breaches"] if (email_info["exists"] == True): skype = email2skype(email) if (skype != []): accounts.extend(skype) google_sources = google_search(email) if (google_sources != []): sources.extend(google_sources) twitter_sources = twitter_search(email) if (twitter_sources != []): sources.extend(twitter_sources) domains_registered = email2domains(email) if (domains_registered != []): email_info["domains_registered"] = domains_registered if accounts != []: email_info["accounts"] = accounts if sources != []: email_info["sources"] = sources else: email_info["emailrep_limit_reached"] = True return email_info
print(response.url, response.content) # ##### 发送请求 ##### # gevent.joinall([ # gevent.spawn(task, method='get', url='https://www.python.org/', req_kwargs={}), # gevent.spawn(task, method='get', url='https://www.yahoo.com/', req_kwargs={}), # gevent.spawn(task, method='get', url='https://github.com/', req_kwargs={}), # ]) # ##### 发送请求(协程池控制最大协程数量) ##### from gevent.pool import Pool pool = Pool(5) gevent.joinall([ pool.spawn(task, method='get', url='https://www.python.org/', req_kwargs={}), pool.spawn(task, method='get', url='https://www.yahoo.com/', req_kwargs={}), pool.spawn(task, method='get', url='https://www.github.com/', req_kwargs={}), ]) """ import grequests import grequests request_list = [ grequests.get('http://httpbin.org/delay/1', timeout=0.001), grequests.get('http://fakedomain/'), grequests.get('http://httpbin.org/status/500') ] # ##### 执行并获取响应列表 ##### response_list = grequests.map(request_list, size=5) print(response_list)
def getMoviesOnWikipedia(movies): # Async request for all the movies, based off the key, which is their URL response = (grequests.get(movie) for movie in movies) return grequests.map(response)
def get(self, resource, *args, **kwargs): """ Get resources. :param resource: Resource name. """ _resource = resource.lower() resources = [] id_ = kwargs.get("id") query = kwargs.get("query") raw = kwargs.get("raw", False) filters = kwargs.get("filters", {}) size = kwargs.get("size", config.max_api_resource_fetch) since = kwargs.get("since", 1) since = since if since > 0 else 1 if id_: if isinstance(id_, string_types) and id_.isdigit(): id_ = int(id_) id_ = sequencify(id_) if _resource == "model": url = self._build_url("_api", "model", "get", prefix=False) urls = None params = None version = kwargs.get("version") hash_ = kwargs.get("hash") if version: if isinstance(version, string_types) and version.isdigit(): version = int(version) version = sequencify(version) if id_: urls = [ self._build_url(url, str(id), prefix=False) for id in id_ ] if version: params = dict({ "version": str(version) + \ ("&%s" % hash_ if hash_ else "") }) if query: url = self._build_url(url, prefix=False) params = [("search", "species"), ("search", "knowledge"), ("name", query)] if urls: req_map = None if version: assert len(urls) == 1 req_map = (self.request("GET", urls[0], params = dict({ "version": str(v) + \ ("&%s" % hash_ if hash_ else "") }), async_request=True) for v in version) else: req_map = (self.request("GET", u, params=params, async_request=True) for u in urls) response = grequests.map( req_map, exception_handler=lambda req, ex: print("request failed")) content = [] for r in response: try: content.append(r.json()) if r.json().get( 'status') and r.json()['status'] is 400: raise ValueError(r.message) except: raise ValueError(r._content) else: response = self.request("GET", url, params=params) content = response.json() if id_: models = self.get("model", size=sys.maxsize, raw=True) filtered_model = [ model for model in models if model["model"]["id"] in id_ ] resources = content if raw else [ _model_response_to_model(self, m) for m in filtered_model ] else: if filters: if "user" in filters: user = filters["user"] if isinstance(user, int): user = self.get("user", id=user) if not isinstance(user, User): raise TypeError( "Expected type for user is User or ID, type %s found." % type(user)) content = list( filter(lambda x: x["model"]["userId"] == user.id, content)) if "domain" in filters: domain = filters["domain"] if domain not in _ACCEPTED_MODEL_DOMAIN_TYPES: raise TypeError("Not a valid domain type: %s" % domain) else: content = list( filter(lambda x: x["model"]["type"] == domain, content)) from_, to = since - 1, min(len(content), size) content = content[from_:from_ + to] resources = content if raw else \ QueryList([ _model_response_to_model(self, obj) for obj in content ]) elif _resource == "user": if not id_: raise ValueError("id required.") response = self.request("GET", "_api/user/lookupUsers", params=[("id", i) for i in id_]) content = response.json() for user_id, user_data in iteritems(content): user = _user_response_to_user( self, merge_dict({"id": user_id}, user_data)) resources.append(user) return squash(resources)
def put_chains_together(): reqs = [] increment = 0 start_at = 0 connections = get_connections() array = [] for index, item in enumerate(connections): array.append('http://' + item['ip'] + ':' + item['port'] + '/chain_length') chain_length_check(array) connections = sorted(connections, key=by_connection_length_key) if len(connections) > 0: increment = math.ceil(connections[len(connections) - 1]['length'] / len(connections)) increments = [] start_ats = [0] for index, item in enumerate(connections): if increment + start_at > item['length']: new_increment = increment - (increment + start_at - item['length']) increments.append(new_increment) else: increments.append(increment) if index > 0: start_at += increments[index] start_ats.append(start_at) for index, inc in enumerate(increments): reqs.append( grequests.get("http://" + item['ip'] + ":" + item['port'] + "/give_chain", params={ 'previous': json.loads(previous_block)['previous_hash'], 'start_at_index': start_ats[index], 'increment_by': inc })) request_chain_results = grequests.map(reqs, exception_handler=exception_handler) chains_to_add = [show_json(result) for result in request_chain_results] prev_chain = {'chain': []} all_chains = [] for i, chain in enumerate(chains_to_add): if len(prev_chain['chain']) > 0 and len(chain['chain']) > 0: print( '\n //// last block previous chain, first block current chain - check difference' ) pprint(prev_chain['chain'][-1]) pprint(chain['chain'][0]) last_block_prev_chain = prev_chain['chain'][-1] first_block_cur_chain = chain['chain'][0] if first_block_cur_chain[ 'index'] > last_block_prev_chain['index'] + 1: chain_diff = first_block_cur_chain[ 'index'] - last_block_prev_chain['index'] r = requests.get("http://" + connections[-1]['ip'] + ":" + connections[-1]['port'] + "/give_chain", params={ 'previous': last_block_prev_chain['previous_hash'], 'start_at_index': 0, 'increment_by': chain_diff }) missing_chain = json.loads(r.text)['chain'] print('\n //// this is the missing chain') pprint(missing_chain) if len(missing_chain) > 0: #del missing_chain[-(chain_diff + 2):-chain_diff] for block in missing_chain[::-1]: chain['chain'].insert(0, block) prev = {} for index, ch in enumerate(chain['chain']): deleted = False if index > 0 and ch['index'] == prev['index']: if ch['timestamp'] < prev['timestamp']: del chain['chain'][index - 1] else: del chain['chain'][index] deleted = True if not deleted: all_chains.append(ch) prev = ch if len(chain['chain']) > 0: prev_chain = chain else: print('chain["chain"] is empty, moving on, saving previous') all_chains = sorted(all_chains, key=by_index_key) return all_chains
print "Retrieving datas of openinging hours of all places" opening_hours_url = configuration[ "data_source"] + "/sensor/getAll?s=" + configuration["secret"] print " Saving..." save_to_file("sensors/opening_hours.json", urllib2.urlopen(opening_hours_url).read()) json_places = json.loads(places) print print "Fetching measures for each places..." requests = map( lambda place: grequests.get(configuration[ "data_source"] + "/measurements/places?ids=" + str(place["id"]) + "&types=wifi"), json_places) results = grequests.map(requests) print " Saving..." map( lambda (index, result): save_to_file( "sensors/sensor-" + str(json_places[index]["id"]) + "_wifi.json", order_data(result.content)), enumerate(results)) print print "Fetching infos of each places..." places_infos_requests = map( lambda place: grequests.get("http://6element.fr/place/" + str(place["id"]) ), json_places) places_infos = grequests.map(places_infos_requests) print " Saving..." map( lambda (index, result): save_to_file(
import grequests urls = [ 'https://webhook.site/f377a059-1c83-4ebd-80ce-007de6067d25', 'https://webhook.site/f377a059-1c83-4ebd-80ce-007de6067d25', 'https://webhook.site/f377a059-1c83-4ebd-80ce-007de6067d25' ] rs = (grequests.get(u) for u in urls) t = grequests.map(rs) print(t)
continue if len(reqs) < nb_reqs_batch: j = { "song_id": song_id, "song_name": row['name'], "artist_name": row['artist_name'], "song_seconds": int(row['song_length'] / 1000) } u = '%sscrape' % random.choice(API_URLS) reqs.append(grequests.post(u, json=j)) if len(reqs) == nb_reqs_batch or i + 1 == len(S): t0 = time() ress = grequests.map(reqs) req_times.append(time() - t0) for res in ress: if res == None: print(res) elif res.status_code != 200: print(res.content) elif 'mp3_key' in res.content.decode( ) and 'htm_key' in res.content.decode(): nb_success += 1 nb_reqs_total += len(reqs) reqs = [] print('%d reqs total, %d reqs successful, %.3lf success rate' % (nb_reqs_total, nb_success, nb_success / nb_reqs_total))
import grequests import itertools import re import json file_handle = open('session.txt', 'r') if file_handle: cookies = json.loads(file_handle.read()) else: print('Error logging in.') exit() file_handle.close() rs = (grequests.get( f'https://mbasic.facebook.com/events/ajax/dashboard/calendar/birthdays/?cursor=2020-{month:02d}-01', cookies=cookies) for month in range(1, 13)) birthdays_requests = grequests.map(rs) birthdays_string = ''.join([request.text for request in birthdays_requests]) dates = re.findall('[a-zA-Z]+, [a-zA-Z]+ [0-9]+, [0-9]{4}', birthdays_string) persons = re.findall(r'alt=\\"(.*?)\\"', birthdays_string) for (date, person) in itertools.zip_longest(dates, persons): print(f'{date} - {person}')
def _test_validator(self, proxy): COLLECTION.update_one({'proxy': proxy}, {'$inc': {'detect_times': 1}}) detect_times_label = 0 http, https = { 'http': proxy.get('http') }, { 'https': proxy.get('https') } reqs = (request(choice(TESTSITES.get('http')), proxy=http, is_map=True), request(choice(TESTSITES.get('https')), proxy=https, is_map=True)) resps = grequests.map(reqs, gtimeout=10, exception_handler=eh) for index, resp in enumerate(resps): if not resp: continue matches = RE_HOST.findall(resp.text) if not matches or self.my_ip in matches: continue p = (https, 'https') if index > 0 else (http, 'http') r = request(p[0], timeout=10) if r: continue self.real_time_q.put(p) self.logger.info('%s, %s', matches, proxy) detect_times_label += 1 if detect_times_label == 2: COLLECTION.update_one({'proxy': proxy}, {'$inc': { 'detect_times': 1 }}) self.count_testsite_ok += 1 cursor = COLLECTION.find_one({'proxy': proxy}) this_id = cursor.get('_id') alive_times = cursor.get('alive_times') + 1 detect_times = cursor.get('detect_times') # [0, 1] score = alive_times * 2 / (alive_times + detect_times) self.logger.info('high anonymity: %s score: %s %s %s', self.count_testsite_ok, score, p[0], resp.url) COLLECTION.update_one({'_id': ObjectId(this_id)}, { '$set': { 'score': score }, '$push': { 'alive_time_base': datetime.now() }, '$addToSet': { 'type': 'high', 'protocol': p[1] }, '$inc': { 'alive_times': 1 } })
def map_requests_for_bill_list(bill_reqs): bill_responses = grequests.map(bill_reqs) return bill_responses
import grequests import requests root = 'https://cdn.hackerrank.com/hackerrank/static/contests/capture-the-flag/secret/' keys = requests.get(root + 'key.json').json().keys() requests = [grequests.get(root + 'secret_json/' + key + '.json') for key in keys] secrets = [dict(response.json())['news_title'] for response in grequests.map(requests)] [print(secret) for secret in sorted(secrets)]
def data(self, duration='H'): """ Retrieve the hourly data from CDEC. Build a list of the URL's that need to be fetched and use grequests to fetch the data. Then the data retrieval will be significantly sped up. """ self.db.db_connect() # deteremine the client/s for processing client = self.config['client'] self._logger.info('Client for CDEC data collection: {}'.format(client)) # query to get the mesowest stations for the given client client_qry = "SELECT primary_id FROM tbl_stations_view WHERE source='cdec' AND client='{}'" cursor = self.db.cnx.cursor() # get the current local time endTime = utils.get_end_time(self.config['timezone'], self.config['end_time'], self.timezone) # create timezone objects mnt = pytz.timezone(self.config['timezone']) pst = pytz.timezone(self.timezone) # if a start time is specified localize it and convert to UTC if self.config['start_time'] is not None: startTime = pd.to_datetime(self.config['start_time']) startTime = mnt.localize(startTime) startTime = startTime.tz_convert(self.timezone) # go through each client and get the stations req = [] stations = [] for cl in client: self._logger.info('Building URLs for client {}'.format(cl)) cursor.execute(client_qry.format(cl)) sta = cursor.fetchall() sta = [s[0] for s in sta] stations = stations + sta # go through each and get the data for stid in sta: if self.config['start_time'] is None: # determine the last value for the station qry = "SELECT max(date_time) + INTERVAL 1 MINUTE AS d FROM tbl_level0 WHERE station_id='%s'" % stid cursor.execute(qry) startTime = cursor.fetchone()[0] if startTime is not None: startTime = pd.to_datetime(startTime, utc=True) else: # start of the water year, do a PST time wy = utils.water_day(endTime, self.timezone) startTime = pd.to_datetime(datetime(wy - 1, 10, 1), utc=False) startTime = pst.localize(startTime) # determine what sensors to retreive and filter to duration sens = self.single_station_info(stid) sens = sens[sens.DUR_CODE == duration] self._logger.debug( 'Building url for station {} between {} and {}'.format( stid, startTime.strftime('%Y-%m-%d'), endTime.strftime('%Y-%m-%d'))) # build the url's for each sensor for s in self.sensor_metadata.keys(): if sens.SENS_LONG_NAME.str.contains(s).any(): p = {} p['Stations'] = stid p['SensorNums'] = self.sensor_metadata[s]['num'] p['dur_code'] = duration p['Start'] = startTime.strftime('%Y-%m-%d') p['End'] = endTime.strftime('%Y-%m-%d') req.append(grequests.get(self.data_csv_url, params=p)) # close the db connection since the data retrieval might take a while self.db.db_close() # send the requests to CDEC self._logger.info('Sending {} requests to CDEC'.format(len(req))) res = grequests.map(req, size=1) # parse the responses data, av = self.cdec2df(res, stations) # insert into the database for stid in list(data.keys()): if data[stid] is not None: self.db.insert_data( data[stid], 'level0', description='CDEC data for {}'.format(stid)) for stid in list(av.keys()): if av[stid] is not None: self.db.insert_data( av[stid], 'level1', description='CDEC data for {} averaged'.format(stid)) # quality control if self.qc: av[stid] = self.qc.run(av[stid])
def get_batch_FIPS(ll_list): rs = [grequests.get(u) for u in url_list] grequests.map(rs) for item in range(len(rs)): fips_list.append(rs[item].response.json()['Block']['FIPS']) return(fips_list)
def async_req(self): return grequests.map((grequests.get(u) for u in self.urls), exception_handler=self.exception, size=self.size)
import requests import grequests urls = [ 'http://www.heroku.com', 'http://tablib.org', 'http://httpbin.org', 'http://python-requests.org', 'http://www.heroku.com', 'http://tablib.org', 'http://httpbin.org', 'http://python-requests.org', 'http://www.heroku.com', 'http://tablib.org', 'http://httpbin.org', 'http://python-requests.org', 'http://kennethreitz.com' ] # rs = (grequests.get(u) for u in urls) print grequests.map(rs) # rs = (requests.get(u, return_response=False) for u in urls) print grequests.map(rs) # print map(lambda u: requests.get(u), urls)
try: import grequests, requests, re, sys except: sys.exit('No gRequests, requests or re') r = requests.get('https://collaboration.skype.com/promotion/', verify=False) if(r.status_code != 200): sys.exit('Error') token = re.search('csrfmiddlewaretoken\\\' value=\\\'(.*?)\\\'', r.content).group(1).strip() # Yolo the "security" print 'Found token:', token def hook(r): if r.status_code == 200: global done done += 1 if(done % 10 == 0): print 'Done:', done else: sys.exit(r.content) for email in emails: reqarr.append(grequests.post('https://collaboration.skype.com/promotion/', cookies = r.cookies, verify=False, hooks={'response': [hook]}, data={'email': email + '@' + ending, 'csrfmiddlewaretoken': token}, headers={'Referer': 'https://collaboration.skype.com/promotion/', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36'})) r = grequests.map(reqarr, size = per_time)[0] print 'Done'