def get_all_users_infos(): base_url = 'https://open.kattis.com/ranklist' page = REQ.get(base_url) users = set() def parse_users(page): nonlocal users matches = re.finditer('<a[^>]*href="/users/(?P<member>[^"/]*)"[^>]*>(?P<name>[^<]*)</a>', page) for match in matches: member = match.group('member') if member in users: continue users.add(member) name = match.group('name').strip() yield {'member': member, 'info': {'name': name}} yield from parse_users(page) urls = re.findall(r'url\s*:\s*(?P<url>"[^"]+")', page) def fetch_url(url): url = json.loads(url) url = urljoin(base_url, url) page = REQ.get(url) yield from parse_users(page) with PoolExecutor(max_workers=10) as executor, tqdm(total=len(urls), desc='urls') as pbar: for gen in executor.map(fetch_url, urls): yield from gen pbar.update()
def task_properties(postcode): url = 'https://www.openrent.co.uk/properties-to-rent/?term=%s&bedrooms_max=-1' % postcode try: r = requests.get(url) except Exception as e: logging.error('Request failed: %s' % e) return (False, (postcode,) + (0,)*4) ids_match = re.search( r"PROPERTYIDS = \[(.*?)\];", r.text, re.MULTILINE | re.DOTALL) if not ids_match: logging.warning('%s: No property ids founds on: %s' % (postcode, url)) return (False, (postcode,) + (0,)*4) ids_raw = ids_match.group(1).split(',') ids = [i.strip() for i in ids_raw if i.strip()] properties = [] chunk_size = 50 chunks = [ids[i:i + chunk_size] for i in range(0, len(ids), chunk_size)] # multi threaded pagination with PoolExecutor(max_workers=ceil(sqrt(CONCURRENCY))) as executor: for properties_chunk in executor.map(get_properties, chunks): properties += properties_chunk stats = get_stats(properties) return (True, (postcode,) + stats)
def triggerAutoImports(limit=100, chunksize=10): articleCount = w.session.query(w.Post.ID).filter( w.Post.post_type == 'post').count() maxresults = min(articleCount, limit) offset = 0 chunksize = min(chunksize, limit) pbar = tqdm(total=maxresults) while True: chunk = w.session.query(w.Post.ID).filter(w.Post.post_type == 'post').limit( chunksize).offset(offset).all() postIds = [id[0] for id in chunk] if len(chunk) > 0: with PoolExecutor(max_workers=10) as executor: for _ in executor.map(triggerAutoImport, postIds): pbar.update(1) pass else: logging.info('no articles to trigger') offset += chunksize if (offset+chunksize) > maxresults: chunksize = maxresults - offset if len(chunk) < chunksize or offset >= maxresults: break
def process(args: argparse.Namespace) -> dict: """Process the image files in threads :param args: the parsed CLI result :return: the AEB images grouped by time { 'ISODATE1': [Image('IMG1'), Image('IMG2), Image('IMG3')], 'ISODATE2': [Image('IMG4')], # ... } """ log.debug("process...") result = {} with PoolExecutor(max_workers=args.jobs) as executor: todos = [] for image in get_all_image_files(args.dir, with_raw=args.withraw): log.debug("Add image %s to future", image) future = executor.submit(consume, image) todos.append(future) for future in as_completed(todos): image = future.result() if image is None: continue # We use the ISO format for datetime object to make it able to serialize it result.setdefault(image.date.isoformat(), []).append(image) return result
def syncForumPosts(limit=1000, chunksize=100): postCount = a.session.query(a.ForumPost.id).order_by(desc(a.ForumPost.modificationDate)).filter( a.ForumPost.language == 'de', a.ForumPost.deleted == 0).count() maxresults = min(postCount, limit) offset = 0 chunksize = min(chunksize, limit) pbar = tqdm(total=maxresults) while True: chunk = a.session.query(a.ForumPost.id).order_by(desc(a.ForumPost.modificationDate)).filter( a.ForumPost.language == 'de', a.ForumPost.deleted == 0).offset(offset).limit(chunksize).all() postIds = [id[0] for id in chunk] if len(postIds) > 0: with PoolExecutor(max_workers=10) as executor: for _ in executor.map(handleForumPostThreaded, postIds): pbar.update(1) pass else: logging.info('no posts to import') offset += chunksize if (offset+chunksize) > maxresults: chunksize = maxresults - offset if len(chunk) < chunksize or offset >= maxresults: break
def updateTags(): uniqueTags = getApitTags() wp_tags = getWpPostTags() wp_tags = wp_tags.to_records() tags_to_create = [] for tag in uniqueTags: found = next( (x for x in wp_tags if x.slug == slugify(tag, separator="-")), None) if found is None: tags_to_create.append(tag) def createIt(tagName): tag = tagName.capitalize() tagSlug = slugify(tag, separator="-") try: logging.info('creating tag: %s', str(tagName)) return api.post('/tags', data={ 'name': tag, 'slug': tagSlug }).json() except Exception as e: logging.error('error on creating tag: %s', e) pass if len(tags_to_create) > 0: with PoolExecutor(max_workers=64) as executor: for _ in executor.map(createIt, tags_to_create): pass else: logging.info("All tags been up2date. no action required")
def main() -> int: """main path of execution, direct to experiments or regular functions. :rtype: int, Return 0 if completed without errors """ # Provide arguments-> input folder, url, number of workers if len(sys.argv) != 4: raise ValueError( "Arguments list is wrong. Please use the following format: {} {} {} {}" .format("python iWebLens_client.py", "<input_folder>", "<URL>", "<number_of_workers>")) if int(sys.argv[3]) == 0: # If client thread count arg is 0 run_experiments() return 0 # Get arguments for inputs and run flags input_folder = os.path.join(sys.argv[1], "") images = get_images_to_be_processed(input_folder) num_images = images.__len__() num_workers = int(sys.argv[3]) start_time = time.time() # Create a worker thread to invoke the requests in parallel with PoolExecutor(max_workers=num_workers) as executor: for _ in executor.map(call_object_detection_service, images): pass elapsed_time = time.time() - start_time print("Total time spent: {} average response time: {}".format( elapsed_time, elapsed_time / num_images)) return 0
def convert_dir_batch(model, device, args): if model.input_size is None and args.batch_size != 1: raise ValueError( "model.input_size is None. use --tiled-render or --batch-size 1.") os.makedirs(args.output, exist_ok=True) loader = ImageLoader(directory=args.input, max_queue_size=256) with torch.no_grad(), PoolExecutor() as pool: output_paths = [None] * args.batch_size minibatch = torch.zeros((args.batch_size, model.in_channels, model.input_size, model.input_size)) minibatch_index = 0 for im, meta in tqdm(loader, ncols=60): x = im if model.in_channels == 1: x = TF.to_grayscale(x) h = w = model.input_size if not (h == x.height and w == x.width): x = TF.resize(x, (h, w)) minibatch[minibatch_index] = TF.to_tensor(x) output_filename = path.splitext(path.basename( meta["filename"]))[0] + ".png" output_paths[minibatch_index] = path.join(args.output, output_filename) minibatch_index += 1 if minibatch_index == minibatch.shape[0]: z = simple_render(minibatch, model, device) for i in range(minibatch_index): pool.submit(save_image, TF.to_pil_image(z[i]), output_paths[i]) minibatch_index = 0 if minibatch_index > 0: z = simple_render(minibatch[0:minibatch_index], model, device) for i in range(minibatch_index): pool.submit(save_image, TF.to_pil_image(z[i]), output_paths[i])
def ec2(): print('Processing EC2 Instances') instances = boto3.resource('ec2').instances.all() with PoolExecutor(max_workers=4) as executor: for _ in executor.map(tag_instance, instances): pass
def videos(session, courses): print("finding videos") links = [get_videos_links(course) for course in courses] links = list(chain.from_iterable(links)) get_videos = partial(go_to_download, session) with PoolExecutor(max_workers=32) as executor: executor.map(get_videos, links)
def execute(self, func, jobs=[], max_workers=MAX_WORKERS): ''' Runs function in a worker pool and returns a generator containing results. https://dev.to/rhymes/how-to-make-python-code-concurrent-with-3-lines-of-code-2fpe ''' with PoolExecutor(max_workers=max_workers) as executor: for result in executor.map(func, jobs): yield result
def syncMatchingArticleComments(): wp_postmetas = w.PostMeta.q.filter( w.PostMeta.meta_key == 'legacy_article_id').all() commentIds = [] for postmeta in wp_postmetas: comments = a.ArticleComment.q.filter( a.ArticleComment.article_id == postmeta.meta_value).all() commentIds += [comment.id for comment in comments] maxresults = len(commentIds) pbar = tqdm(total=maxresults) def handleArticleCommentThreaded(commentId): w.session() a.session() comment = handleArticleComment(commentId) w.session.remove() a.session.remove() return comment if len(commentIds) > 0: with PoolExecutor(max_workers=10) as executor: for _ in executor.map(handleArticleCommentThreaded, commentIds): pbar.update(1) pass else: logging.info('no comments to import') fixCommentsCounter() logging.info('import of matching comments done.')
def pull_log_all(client, project_name, logstore_name, from_time, to_time): cpu_count = multiprocessing.cpu_count() * 2 shards = client.list_shards(project_name, logstore_name).get_shards_info() current_shards = [str(shard['shardID']) for shard in shards] target_shards = current_shards worker_size = min(cpu_count, len(target_shards)) result = [] with PoolExecutor(max_workers=worker_size) as pool: futures = [ pool.submit(MyMagics.pull_worker, client, project_name, logstore_name, from_time, to_time, shard_id=shard) for shard in target_shards ] try: for future in as_completed(futures): data = future.result() result.extend(data) return True, result except KeyboardInterrupt as ex: clear_output() print(u"正在取消当前获取……") for future in futures: if not future.done(): future.cancel() return False, result
def get_users_infos(users, pbar=None): def fetch_profile(user): url = f'http://api.topcoder.com/v2/users/{user}' ret = {} for _ in range(2): try: page = REQ.get(url) ret = json.loads(page) if 'error' in ret: if isinstance( ret['error'], dict) and ret['error'].get('value') == 404: ret = {'handle': user, 'action': 'remove'} else: continue break except Exception: pass sleep(1) if 'handle' not in ret: if not ret: ret['delta'] = timedelta(days=30) ret['handle'] = user return ret ret = [] with PoolExecutor(max_workers=4) as executor: for user, data in zip(users, executor.map(fetch_profile, users)): data['handle'] = data['handle'].strip() assert user.lower() == data['handle'].lower() if pbar: pbar.update(1) ret.append(None if data.get('action') == 'remove' else data) return ret
def main(): options = docopt(__docopt__, help=True) cleaned_options = validate_user_arguments(options) api_token = None config_usernames, repos, api_token = get_settings() if not api_token: api_token = os.environ.get("GITHUB_TOKEN_GALAXY") or os.environ.get( "GITHUB_TOKEN" ) if not api_token: print( "Please provide a Github API token via environment variable `GITHUB_TOKEN_GALAXY` or via settings file.", file=sys.stderr, ) sys.exit(1) if cleaned_options["USERS"]: # We got a list of users from the cli. usernames = cleaned_options["USERS"] else: # Using users from the config. usernames = config_usernames get_prs_for_user_with_api_token = partial(get_prs_for_user, api_token=api_token) with PoolExecutor(max_workers=8) as executor: for data in executor.map(get_prs_for_user_with_api_token, usernames): print_prs_detail(data, repos)
def scan_logs(source_path: str) -> list: """ Функция получает список файлов *.txt по входному пути, и многопоточно вызывает функцию process_new_file, которая обрабатывает содержимое файлов :param source_path: Путь где лежат логи :return: список со всеми строками всех файлов в виде словарей """ filenames = glob.glob(source_path + f'/*.txt') filenames.sort( key=os.path.getmtime) # имена файлов по возрастанию даты создания all_events = [] with PoolExecutor(max_workers=4) as executor: t0 = get_current_time() logging.info(f'start scanning at {datetime.now()}') for result in executor.map( process_log_file, [f for f in filenames if not file_is_old(f)]): all_events.extend(result) all_events.sort(key=lambda x: x["datetime"] ) # сортировка всего списка по дате, ролики идут подряд logging.info( f'all files scanned and sorted. Total lines: {len(all_events)}, total time: {get_current_time() - t0}' ) return all_events
def importArticleComments(limit=100, chunksize=10, lastModificationDate='1970-01-01 0:00'): logging.info('start importing comments') commentsCount = a.session.query(a.ArticleComment.id).filter( a.ArticleComment.modificationDate >= lastModificationDate, a.ArticleComment.language == 'de', a.ArticleComment.deleted == 0).count() maxresults = min(commentsCount, limit) offset = 0 chunksize = min(chunksize, limit) pbar = tqdm(total=maxresults) while True: chunk = a.session.query(a.ArticleComment.id).order_by(desc(a.ArticleComment.modificationDate)).filter(a.ArticleComment.modificationDate >= lastModificationDate, a.ArticleComment.language == 'de', a.ArticleComment.deleted == 0).offset(offset).limit(chunksize).all() commentIds = [id[0] for id in chunk] if len(chunk) > 0: with PoolExecutor(max_workers=10) as executor: for _ in executor.map(handleArticleCommentThreaded, commentIds): pass else: logging.info('no comments to import') pbar.update(len(chunk)) offset += chunksize if (offset+chunksize) > maxresults: chunksize = maxresults - offset if len(chunk) < chunksize or offset >= maxresults: break fixCommentsCounter()
def getAllListings(center, radius): response = getOneListings(center, radius, 1) total = response["meta"]["total_listings"] maxPages = len(range(0, total, PAGE_SIZE)) listings = response["data"]["listings"] logging.info("listings found: %s" % total) try: with PoolExecutor(max_workers=MAX_WORKERS) as executor: for response in executor.map( lambda p: getOneListings(center, radius, p), range(2, maxPages + 1)): listings += response["data"]["listings"] logging.info('fetch all listings: done') except Exception as e: logging.error('fetch all listings (failed): %s' % e) # check data integrity if (len(listings) == total): logging.info( "listings integrity check passed: %s/%s listings received." % (len(listings), total)) else: logging.warning( "listings integrity check failed: %s/%s listings received." % (len(listings), total)) return listings
def updateCategories(): uniqueCategories = getApitCategories() wp_categories = getWpCategories() wp_categories = wp_categories.to_records() categories_to_create = [] for cat in uniqueCategories: found = next((x for x in wp_categories if x.slug == slugify(cat, separator="-")), None) if found is None: categories_to_create.append(cat) def createIt(categoryName): category = categoryName.capitalize() categorySlug = slugify(categoryName, separator="-") try: logging.info('creating category: %s', category) return api.post('/categories', data={ 'name': category, 'slug': categorySlug }).json() except Exception as e: logging.error('error on creating category: %s', e) pass if len(categories_to_create) > 0: with PoolExecutor(max_workers=64) as executor: for _ in executor.map(createIt, categories_to_create): pass else: logging.info("All categories been up2date. no action required")
def ec2(region): print('Processing EC2 Instances') instances = boto3.resource('ec2', region_name=region).instances.all() print('ec2 - {}'.format(region)) with PoolExecutor(max_workers=4) as executor: for _ in executor.map(tag_instance, instances): pass
def convert_files(ctx, files, args): loader = ImageLoader(files=files, max_queue_size=128) os.makedirs(args.output, exist_ok=True) with torch.no_grad(), PoolExecutor() as pool: for im, meta in tqdm(loader, ncols=60): z = ctx.convert(im, meta, args.method, args.noise_level, args.tile_size, args.batch_size, args.tta) output_filename = path.splitext(path.basename(meta["filename"]))[0] + ".png" pool.submit(save_image, z, meta, path.join(args.output, output_filename))
def updateFeatureImages(): logging.info('reading data ...') featureImages = getApitFeatureImages(limit=1000) # featureImages = [featureImages[0]] wp_mediafiles = getWpMediaFiles() media_to_create = [] logging.info('matching ...') for img in featureImages: print('checking', img.id) if wp_mediafiles.index.contains(f'{img.id}') == False: media_to_create.append(img) logging.info('about to import %d files', len(media_to_create)) # media_to_create = media_to_create[:1] maxfiles = len(media_to_create) def createIt(userFile): try: # logging.info('creating mediafile: %s', userFile.fileName) mediaSrc = requests.get(userFile.url) # print(mediaSrc.content) headers = { 'cache-control': 'no-cache', "Content-Disposition": f'attachment; filename="{userFile.fileName}"', 'content-type': 'image/jpeg' } # print('headers', headers) res = api.post('/media', headers=headers, data=mediaSrc.content) mediaResponse = json.loads(res.text) mediaId = mediaResponse['id'] mediaPayload = {"meta": {"legacy_userfile_id": f'{userFile.id}'}} updateres = api.put(f'/media/{mediaId}', data=json.dumps(mediaPayload), headers={'content-type': 'application/json'}) # print(updateres) # print(mediaId) pbar.update(1) # one file created return mediaId except Exception as e: logging.error('error on creating media: %s', e) pass if len(media_to_create) > 0: pbar = tqdm(total=maxfiles) with PoolExecutor(max_workers=8) as executor: for _ in executor.map(createIt, media_to_create): pass else: logging.info("All featureImages been up2date. no action required")
def check_btn_gpio_input(self) -> None: combined = self.config.combined while True: with PoolExecutor() as executor: for val in combined: executor.submit(self._check_btn, val) if not self.restarting_game: self.time_for_move -= .01 time.sleep(.01)
def sending_multiple_requests(url, session, proxies=None): urls = [url] * NUM_OF_REQUESTS with PoolExecutor(max_workers=NUM_OF_REQUESTS * NUM_OF_URLS_MULTIPLE) as executor: # res = executor.map(get_it, urls, session) args = ((url, session, proxies) for url in urls) res = executor.map(lambda p: get_it(*p), args) # (*p) does the unpacking part return res
def run_experiments(): """ Run a specific set of worker-pod combinations several times to test web service performance and save out the results as a CSV. """ import pandas as pd from tqdm import tqdm global TESTING # Disable output for each call but allow error output TESTING = True # Fetch arguments for input and set run flags manually input_folder = os.path.join(sys.argv[1], "") images = get_images_to_be_processed(input_folder) num_images = images.__len__() # at most max_workers threads to execute calls asynchronously num_workers = [1, 6, 11, 16, 21, 26, 31] pods = [1, 2, 3] # How many pod replicas are in the activate cluster n_tests = 3 # How many times to run each configuration for more accuracy print( "Experiment mode on, running tests for pods {} and workers {}, each a total of {} times." .format(pods, num_workers, n_tests)) # Table using pandas for recording results cols = ['pod_count', 'client_threads', 'avg_response'] df = pd.DataFrame(columns=cols) for pod in pods: # Initalise and increment progress bar for num_worker in tqdm(num_workers): aggregate_average_response = 0 # Store each avg time for n_tests # Repeat test for worker-pod combo n_test times for more representative results for i in range(n_tests): start_time = time.time() with PoolExecutor(max_workers=num_worker) as executor: for _ in executor.map(call_object_detection_service, images): pass elapsed_time = time.time() - start_time aggregate_average_response += elapsed_time / \ num_images # Update total average time # Add some sleep to prevent small server overload # Create a data row and append it to the results table row = pd.DataFrame( [[pod, num_worker, aggregate_average_response / n_tests]], columns=cols) df = df.append(row) # Wait indefinitely for user to change deployment _ = input( "Please increment total pod replicas to {}. Sleeping, press enter to wake." .format(pod + 1)) # Save results table as CSV df.to_csv('experiments.csv', index=False)
def updateAuthors(): authors = getApitAuthors() wp_users = getWpUsers() authors_to_create = [] for author in authors: if wp_users.index.contains(f'{author.id}') != True: authors_to_create.append(author) def createIt(author): if author.staffPageDescriptionJson is not None: description = json.loads(author.staffPageDescriptionJson) else: description = json.loads('{"de": ""}') if author.emailAddressNew is not None: email = author.emailAddressNew else: email = author.emailAddress email = re.sub(r"_DA_\d*$", "", email) # print('handle', email) name = author.communityName.split(' ') payload = { "username": slugify(author.username, separator="_"), "name": author.communityName, "first_name": name[0], "last_name": name[1] if len(name) > 1 else '', "roles": ["author"], "email": email, "description": description.get('de'), "locale": "en_US", "nickname": "", "password": "******", "meta": { "legacy_user_id": f'{author.id}' } } try: logging.info('creating author: %s', email) res = api.post('/users', data=json.dumps(payload), headers={'content-type': 'application/json'}) return res except Exception as e: logging.error('error on creating author: %s (%s)', email, e) pass if len(authors_to_create) > 0: with PoolExecutor(max_workers=64) as executor: for _ in executor.map(createIt, authors_to_create): pass else: logging.info("All authors been up2date. no action required")
def courses(session, page, callback): print("finding courses") links = page.find_all(attrs={"data-role": "course-box-link"}) base = "https://chinesezerotohero.teachable.com/" links = [base + link['href'] for link in links][1:] get_courses = partial(go_to_link, session) with PoolExecutor(max_workers=8) as executor: result = executor.map(get_courses, links) result = [i for i in result] callback(result)
def post(self, request: HttpRequest, *args, **kwargs): if request.json_data is not None: data = request.json_data['data'] with PoolExecutor(max_workers=4) as executor: threads = [ executor.submit(self._save_kita_entry, data=d) for d in data ] [t.result() for t in threads] return HttpResponse(status=200)
def convert_dir_tiled(model, device, args): loader = ImageLoader(directory=args.input, max_queue_size=256) os.makedirs(args.output, exist_ok=True) with torch.no_grad(), PoolExecutor() as pool: for im, meta in tqdm(loader, ncols=60): z = convert(im, model, device, args) output_filename = path.splitext(path.basename( meta["filename"]))[0] + ".png" pool.submit(save_image, TF.to_pil_image(z), path.join(args.output, output_filename))
def Parse(): list = [] for root, dirs, files in os.walk(directory): with PoolExecutor(max_workers=4) as executor: for fileIndex, fund in enumerate(executor.map( ParseInternal, files)): list.append(fund) pass return list