def clean(self): threadPool = ThreadPool(self.threadCount) args = [] for t in range(self.topicNum): args.append((self.connectorName, self.topics[t], self.partitionNum)) threadPool.starmap(self.driver.cleanTableStagePipe, args) threadPool.close() threadPool.join()
def upload_for_hosts(self, hosts, files, remote_path): files_for_hosts = [] for host in hosts: files_for_hosts.append([host, files, remote_path]) pool = ThreadPool(self.threads_num) pool.starmap(self.upload_on_host, files_for_hosts) pool.close() pool.join()
def main(self, threads): from multiprocessing.dummy import Pool self.load(base_path) self.load_proxies(proxies_path, p_type) self.threads = threads pool = Pool(self.threads) pool.starmap(self.login, zip(self.acc_array, itertools.cycle(self.proxies)))
def start(self): pool = ThreadPool(self.PROFILES_LOADER_POOL_SIZE) pool.starmap(self._do_search, self._get_search_iterator()) pool.close() pool.join()
def download_media(media_set, session, directory, board_name): def download(thread, session, directory): directory = thread["download_path"] + "/" valid = False for post in thread["posts"]: name_key = "originalName" for media in post["files"]: filename = re.sub(r'[\\/*?:"<>|]', '', media[name_key]) ext = media["ext"] alt_name = media["alt_filename"] link = "https://bbw-chan.nl" + media["path"] download_path = directory + filename count_string = len(download_path) lp = are_long_paths_enabled() if not lp: if count_string > maximum_length: num_sum = count_string - maximum_length name_key = "alt_filename" download_path = directory + post[name_key] + "." + ext og_filename = os.path.splitext(filename)[0] # Check for dupe here r = session.get(link, stream=True) if r.status_code != 404: if not os.path.exists(os.path.dirname(download_path)): os.makedirs(os.path.dirname(download_path)) with open(download_path, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) logger.info("Link: {}".format(link)) logger.info("Path: {}".format(download_path)) valid = True else: logger.info("Fail (Link): {}".format(link)) logger.info("Fail (Path): {}".format(download_path)) if valid: os.makedirs(directory, exist_ok=True) with open(directory + 'archive.json', 'w') as outfile: json.dump(thread, outfile) return thread else: return print("Download Processing") print("Name: " + board_name) print("Directory: " + directory) # print("Downloading "+post_count+" "+location) max_threads = len(media_set) if multithreading: pool = ThreadPool(max_threads) else: pool = ThreadPool(1) session.mount( 'https://', requests.adapters.HTTPAdapter(pool_connections=4, pool_maxsize=max_threads)) pool.starmap(download, product(media_set, [session], [directory]))
def download_media(media_set, session, directory, username, post_count, location): def download(media, session, directory, username): count = 0 while count < 11: link = media["link"] r = json_request(session, link, "HEAD", True, False) if not r: return False header = r.headers content_length = int(header["content-length"]) date_object = datetime.strptime( media["postedAt"], "%d-%m-%Y %H:%M:%S") og_filename = media["filename"] media["ext"] = os.path.splitext(og_filename)[1] media["ext"] = media["ext"].replace(".", "") download_path = media["directory"]+media["filename"] timestamp = date_object.timestamp() if not overwrite_files: if check_for_dupe_file(download_path, content_length): return r = json_request(session, link, "GET", True, False) if not r: return False delete = False try: with open(download_path, 'wb') as f: delete = True for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) except (ConnectionResetError) as e: if delete: os.unlink(download_path) log_error.exception(e) count += 1 continue except Exception as e: if delete: os.unlink(download_path) log_error.exception(str(e) + "\n Tries: "+str(count)) count += 1 # input("Enter to continue") continue format_image(download_path, timestamp) log_download.info("Link: {}".format(link)) log_download.info("Path: {}".format(download_path)) return True print("Download Processing") print("Name: "+username+" | Directory: " + directory) print("Downloading "+str(len(media_set))+" "+location+"\n") if multithreading: pool = ThreadPool() else: pool = ThreadPool(1) pool.starmap(download, product( media_set, [session], [directory], [username]))
def generate_statistics(files, output_path_general, output_path_letters_punc): """ входная точка в утилиту, которые формирует статискику для всех файлов Parameters: files: список файлов, подаваемых на вход утилите для дальнейшей обработки output_path_general: имя файла, где будет располагаться результат работы утилиты по общим характеристикам output_path_letters_punc: имя файла, где будет распологаться результат работы утилиты по отдельным буквам и знакам """ global count count = len(files) open(output_path_general, 'w').close() open(output_path_letters_punc, 'w').close() # Итерируем список файлов и результат статистики пишем в качестве строки в файл pool = ThreadPool(5) results = pool.starmap(generate_statistic, zip(files)) results_az_punc = pool.starmap(generate_statistic_az_punc, zip(files)) pool.close() pool.join() file_result = [] file_result_az_punc = [] for result in results_az_punc: try: file_result_az_punc.append(result) except Exception as e: print(e) print(traceback.format_exc()) for result in results: try: result_list = [] for f in ordered_features: result_list.append(str(result[f])) file_result.append(result_list) except Exception as e: print(e) print(traceback.format_exc()) # задаём формат csv-файла для статистики по общим характеристикам table_general = pd.DataFrame(file_result, index=files, columns=["number_of_alphabets", "number_of_characters", "number_of_words", "number_of_sentence", "average_sentence_length_by_character", "average_sentence_length_by_word", "average_word_length" ]) table_general.to_csv(output_path_general, header=True, index=True) table_number_of_alphabets_az = pd.DataFrame(file_result_az_punc, index=files, columns=ordered_features_az_punc) table_number_of_alphabets_az.to_csv(output_path_letters_punc, header=True, index=True)
def loop(self): while 1: array = [] for id in self.monitors: array.append((id, self.imageFolder)) pool = ThreadPool(len(self.monitors)) pool.starmap(self.bgTransition, array) time.sleep(self.timeout)
def process_testing_data(self): if self.testing_dir: pool = ThreadPool(self.NUM_THREADS) self.finished_loading = 0 args = get_pool_args(self.testing_data, self.testing_labels, self.testing_dir, self.num_classes) pool.starmap(self.process_symbol_directory, args) self.testing_labels = categorize_labels( self.testing_labels, self.num_classes) print("Finished loading testing data")
def main(): input_file = './input.txt' output_file = './output.txt' pool = Pool(1) lock = Lock() pool.starmap(process_chunk, [ ( input_file, output_file, chunk_start, chunk_end, lock ) for chunk_start, chunk_end in chunkify(input_file, 10) ]) pool.close() pool.join()
def main(profile=None, dryrun=True): # AWS Credentials # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html session = boto3.Session(profile_name=profile) ec2 = session.client('ec2', region_name='us-east-1') regions = get_regions(ec2) if dryrun: print("Dryrun, not actually deleting anything") pool = ThreadPool(len(regions)) pool.starmap(delete_everything_in_region, zip(itertools.repeat(ec2), itertools.repeat(session), regions, itertools.repeat(dryrun))) pool.close() pool.join() # wait for parallel requests to complete
def _multiThreadedTest(infiles): arg1 = [] arg2 = home for item in infiles: arg1.append(item) pool = ThreadPool(len(arg1)) pool.starmap(_csvParse, zip(arg1, repeat(arg2))) print("Parsed through %d IP addresses." % (len(set(internal_ips + external_ips)))) _blackList(hosts=set(internal_ips + external_ips)) _geolocate(hosts)
def store_raw_images(paths, links): global pic_num for link, path in zip(links, paths): if not os.path.exists(path): os.makedirs(path) image_urls = str(urllib.request.urlopen(link).read()) pool = ThreadPool(32) pool.starmap(loadImage, zip(itertools.repeat(path),image_urls.split('\\n'),itertools.count(pic_num))) pool.close() pool.join()
def __init__(self, json_path, dataset_root, classes, limit): # load the json file path # initialize the default dict to hold the annotations self.dataset = json.load(open(json_path, 'r')) print('Parsing Annotation File') self.img_to_annotations = defaultdict(list) self.img_id_to_img = {} self.categories = {} self.dataset_annotations = os.path.join(dataset_root, 'annotations') self.dataset_images = os.path.join(dataset_root, 'images') # obtain the category ids from the dataset category_ids = [cat['id'] for cat in self.dataset['categories'] if cat['name'] in classes] # create individual counter for annotations of each classes self.classes_counter = {} for i in range(len(category_ids)): self.classes_counter[category_ids[i]] = 0 for category in self.dataset['categories']: self.categories[category['id']] = category # loop through the annotations in dataset count = 0 for annotation in self.dataset['annotations']: # filter classes through the annotations if annotation['category_id'] in category_ids: self.img_to_annotations[annotation['image_id']].append(annotation) self.classes_counter[annotation['category_id']] += 1 if self.classes_counter[annotation['category_id']] >= limit: category_ids.remove(annotation['category_id']) for image in self.dataset['images']: self.img_id_to_img[image['id']] = image #for k, v in self.img_to_annotations.items(): # if not os.path.isfile(os.path.join(self.dataset_images, str(k) + '.jpg')): # print('Image File doesnot exists {}'.format(k)) # if not os.path.isfile(os.path.join(self.dataset_annotations, str(k) + '.xml')): # print('Annotations File doesnot exists {}'.format(k)) print('Finished Parsing Images, \n\tTotal Images -> {}'.format(len(self.img_to_annotations))) print('\n\tTotal Annotations:') for k, v in self.classes_counter.items(): print('\n\t\t{} -> {}'.format(self.categories[k]['name'], v)) input('Press any key to continue') print('Generating Pascal VOC XML Format Annotations') # initialize multiple threads for processing the parsing pool = ThreadPool(parallel_threads) pool.starmap(self.downloader, self.img_to_annotations.items()) pool.close() pool.join()
def filterMsgS(args,msgList): """ filter out all dedicate msg entries into different dedicate files :param msgList: msg block defitions load from json file :return: """ pool = ThreadPool(cpu_count()) # pool.starmap(filterItem, zip(itertools.repeat(args), msgList)) pool.starmap(startPoint, zip(itertools.repeat(args), msgList)) pool.close() pool.join()
def send(self): threadPool = ThreadPool(self.threadCount) for t in range(self.topicNum): self.driver.createTopics(self.topics[t], self.partitionNum, 1) sleep(5) args = [] for t in range(self.topicNum): for p in range(self.partitionNum): args.append((t, p)) threadPool.starmap(self.sendHelper, args) threadPool.close() threadPool.join()
def home_downloader(): lnglats = get_lnglat() if dir_name not in (os.listdir(os.curdir)): os.mkdir(dir_name) lnglats_arg = [] for lnglat in lnglats: lat = "{0:0<8}".format((lnglat[0]).replace('.', ''))[:8] lng = "{0:0<9}".format((lnglat[1]).replace('.', ''))[:9] lnglats_arg.append((lat, lng)) pool = ThreadPool(1) pool.starmap(download_home, lnglats_arg) pool.close() pool.join()
def scan_urls(num_workers=8): p = "../data/event/" event_metadata = load_json(p + "event_metadata.json") pool = Pool(num_workers) for date_str in event_metadata: event_json = load_json(p + date_str + ".json") url_list = [] for cam_id in event_json: for view_id in event_json[cam_id]["url"]: url_list += event_json[cam_id]["url"][view_id]["url"] pool.starmap(url_open_worker, url_list) pool.close() pool.join()
def cut(self, X, Y, index, depth): features = self.data.features best_edges = zeros(len(features)) best_cuts = zeros(len(features)) stump_cuts = pd.DataFrame(ones(X.shape, dtype=int), columns=features, index=X.index) stump_pool = ThreadPool(8) stump_input = [] for i, feature in enumerate(features): stump_input.append((X[feature], Y)) #stump_results = stump_pool.starmap(self.stump, stump_input) #stump_pool.close() #stump_pool.join() #for i, res in enumerate(stump_results): # best_cuts[i], stump_cuts[features[i]], best_edges[i] = res for i, feature in enumerate(features): if i == 3 and depth == 2: print_stump = True else: print_stump = False best_cuts[i], stump_cuts[features[i]], best_edges[i] = self.stump(X[feature], Y, print=print_stump) idx = argmax(best_edges) # if index > 3 and index < 6: # print(idx, features[idx], best_edges) counts = freq_count(stump_cuts[features[idx]]) # just to get counts if self.method == 'gini': p = partitionGini(Y) else: p = partitionGini(Y) if -1 in counts: rule = Node('rule', index, p.N, depth=depth + 1, label=p.label, probability=p.probability, feature=features[idx], threshold=best_cuts[idx]) self.tree[index] = rule print(rule) # print(X[stump_cuts[features[idx]]==-1][features[idx]], Y[stump_cuts[features[idx]]==-1]) pool = ThreadPool(2) args = [(X[stump_cuts[features[idx]] == -1], Y[stump_cuts[features[idx]] == -1], rule.left, depth + 1), (X[stump_cuts[features[idx]] == 1], Y[stump_cuts[features[idx]] == 1], rule.right, depth + 1)] pool.starmap(self.cut, args) pool.close() pool.join() # self.cut(X[stump_cuts[features[idx]]==-1], Y[stump_cuts[features[idx]]==-1], rule.left) # print(X[stump_cuts[features[idx]] == 1][features[idx]], Y[stump_cuts[features[idx]] == 1]) # self.cut(X[stump_cuts[features[idx]] == 1], Y[stump_cuts[features[idx]] == 1], rule.right) else: # no cut took place leaf = Node('leaf', index, p.N, depth=depth + 1, label=p.label, probability=p.probability) self.leaf_samples += p.N.sum()/self.data.N print(leaf, self.leaf_samples) self.tree[index] = leaf return
def main(): global prid parser = argparse.ArgumentParser( "betterclone.py", description="copies a folder using service accounts") parser.add_argument("-k", "--keyfile", default="key.json", help="keyfile filename") parser.add_argument("project", help="id of the project") parser.add_argument("source", help="id of the source folder") parser.add_argument("destination", help="id of the destination folder") args = parser.parse_args() print("auth main sa") credentials = ServiceAccountCredentials.from_json_keyfile_name( args.keyfile, [ "https://www.googleapis.com/auth/iam", "https://www.googleapis.com/auth/drive" ]) iam = googleapiclient.discovery.build("iam", "v1", credentials=credentials) drive = googleapiclient.discovery.build("drive", "v3", credentials=credentials) prid = args.project flist = resolve_folder(drive, args.source, args.destination) print("processing directories") while True: c = True for i in flist: if not i[2]: print("process " + i[0]) flist += resolve_folder(drive, i[0], i[1]) del flist[flist.index(i)] c = False if c: break print("start copy") pool = ThreadPool(98) pool.starmap( copy_dir, zip(itertools.repeat(credentials), [i[0] for i in flist], [i[1] for i in flist])) pool.close() pool.join()
def load_sources(self, set_dir, dataset="validation", normalize="zscore", store_raw=False): # Load sources in dataset with proper id # This happens once, upon calling dataset.prepare() self.dataset = dataset self.out_dir = set_dir # load specifications for image Dataset # follows load_shapes example black = (0, 0, 0) height = 512 width = 512 # add DES classes self.add_class("des", 1, "star") self.add_class("des", 2, "galaxy") # find number of sets: num_sets = 0 for setdir in os.listdir(self.out_dir): if 'set_' in setdir: # add tranining image set self.add_image("des", image_id=num_sets, path=os.path.join(self.out_dir, set_dir), width=width, height=height, bg_color=black) num_sets += 1 # store data in memory self.images = [None] * (num_sets) if store_raw: self.raws = [None] * (num_sets) self.masks = [None] * num_sets self.class_ids_mem = [None] * num_sets threads = np.clip(mp.cpu_count(), 1, num_sets) print("Loading images from disk.") pool = ThreadPool(threads) pool.starmap(self.load_image_disk, [(i, normalize, store_raw) for i in range(num_sets)]) if dataset == "training" or dataset == "validation": print("Loading masks from disk (this may take several minutes).") pool.map(self.load_mask_disk, range(num_sets)) pool.close() pool.join() return
def __init_C_list(self): # somewhat empirical... if np.sum(self.__C_computed) <= (self.__n**2) / 4: for a in range(0, self.__n): C = zeros(self.__n, self.__n) for b in range(0, self.__n): C = C + self.__P_list[b] * self.__vandermonde_inv[a, b] self.__C_list[a] = C else: pool = ThreadPool(4) for a in range(0, self.__n): for b in range(0, self.__n): if self.__C_computed[a, b] == 0: pool.starmap(self.__compute_C_ij, zip(np.where(self.__C_computed == 0)))
def exec(self, commands, **kwargs): """ :param commands: the list of commands to execute for hosts or dict of list of commands indexed by host :return: the list of lines """ from functools import partial commands_for_hosts = [] output = [] if isinstance(commands, list): for host in self.hosts: commands_for_hosts.append([host, commands]) elif isinstance(commands, dict): for host in commands.keys(): commands_for_hosts.append([host, commands[host]]) else: for host in self.hosts: commands_for_hosts.append([host, [commands]]) pool = ThreadPool(self.threads_num) raw_results = pool.starmap(partial(self.exec_on_host, **kwargs), commands_for_hosts) results = {} for raw_result in raw_results: for host in raw_result.keys(): results[host] = raw_result[host] pool.close() pool.join() return results
def repSimilarity(allReps): keySet = list(allReps.keys()) usedKeys = keySet[:50] repsToAnalyze = [] for repName1 in usedKeys: for repName2 in usedKeys: if repName1 == repName2: continue repsToAnalyze.append([allReps, repName1, repName2]) pool = ThreadPool(16) result = pool.starmap(binarySimilarity, repsToAnalyze) diffPartyResults = [] for res in result: rep1Name, rep2Name, similarVotes, dissimilarVotes, numSameParty, rep1Party, rep2Party = res if numSameParty != 0: continue if similarVotes + dissimilarVotes == 0: continue ratioSame = similarVotes / (similarVotes + dissimilarVotes) diffPartyResults.append((ratioSame, similarVotes + dissimilarVotes, rep1Name, rep1Party, rep2Name, rep2Party)) sortedDiffParty = sorted(diffPartyResults) for line in sortedDiffParty: print(line)
def PESQ_evalpaths(reference_paths, degraded_paths, fs): """Compute the PESQ scores for all wavefiles in a list. Walks though a list of degraded wavefiles paths and computes all the scores with refrence wavefiles paths in the refrence list. Parameters ---------- reference_path : str The path to the refrence wavfile. degraded_path : str The path to the degraded wavfile. fs : int The sample frequency should be 8000 or 16000. reference_dir : str Removes the refrence directory from the path to only save the filenames. Returns ------- mos : 1-d float array The mean opinion score. Returns NAN if PESQ failed. mos_lqo : 1-d float array The mean opinion score rescaled with lqo. Returns NAN if PESQ failed. """ PESQ_fs = partial(PESQ, fs=fs) p = Pool(THREADS) res = p.starmap(PESQ_fs, zip(reference_paths, degraded_paths)) p.close() mos_list, mos_lqo_list = list(zip(*res)) mos_list = np.array(mos_list, dtype=np.float) mos_lqo_list = np.array(mos_lqo_list, dtype=np.float) return mos_list, mos_lqo_list
def extractAllAndCompare(self): names = ["times of india", "the hindu", "guardian", "new york times", "google news", "cnn", "reddit news", "reddit world news", "telegraph", "bbc"] outputfiles = "" import datetime, os today = str(datetime.date.today()) directory = "./data/allFiles/" + today if not os.path.exists(directory): os.makedirs(directory) storageFile = directory + "/allValueFiles.txt" if not os.path.exists(storageFile): e = multiprocessing.Event() # To synchronize progress bar queue = multiprocessing.Queue() # To get score file from threaded process from multiprocessing.dummy import Pool as ThreadPool from itertools import repeat pool = ThreadPool(4) results = pool.starmap(extractorRunner.runScrapper, zip(names, repeat(e), repeat(queue))) pool.close() pool.join() for i in range(10): outputfiles += " " + queue.get() with open(storageFile, "w") as temp: temp.write(outputfiles) QApplication.processEvents() # Show comparision graph outputProcess = subprocess.Popen("python -m ui.comparingAll " + storageFile) outputProcess.wait() QApplication.processEvents()
def extract_bundle(self, request, replica): """ Get the files and actual metadata. This is the main method that will extract the contents of the bundle and separate it into a tuple of (metadata_files, data_files), where the metadata_files are actual contents of the metadata files and the data_files are the metadata describing the files. :param request: The contents of the DSS event notification :param replica: The replica to which pull the bundle from """ def get_metadata(file_name, _args): _metadata = {file_name: self.__get_file(*_args)} return _metadata bundle_uuid = request['match']['bundle_uuid'] # Get the metadata and data descriptions metadata_files, data_files = self.__get_bundle(bundle_uuid, replica) # Create a ThreadPool which will execute the function pool = ThreadPool(len(metadata_files)) # Pool the contents in the right format for the get_metadata function args = [(name, (_f['uuid'], replica)) for name, _f in metadata_files.items()] results = pool.starmap(get_metadata, args) pool.close() pool.join() # Reassign the metadata files as a single dictionary metadata_files = dict(ChainMap(*results)) return metadata_files, data_files
def _pool_query(self, query, func, attr, callback): """Uses :code:`query` to perform :code:`func` with kwargs :code:`attr` in parallel against all configured geocoders. Performs :code:`callback` function on the result list of addresses or locations. Args: query (str): The query component of a reverse or forward geocode. func (function): Function to use to obtain an answer. attr (dict): Keyword arguments to pass to function for each geocoder. callback (func): Function to run over iterable result. Returns: Output of `callback`. """ pool = ThreadPool() results = pool.starmap(func, zip([g.geocoder for g in self.geocoders], repeat(query), [getattr(g, attr) for g in self.geocoders])) pool.close() pool.join() locations = [] for location in results: if isinstance(location, list): locations.extend(location) else: locations.append(location) # locations = [item for sublist in results for item in sublist] return callback(locations)
def get_frames(df_img_url, dir_p="data/rgb/", num_try=0, num_workers=4): print("="*100) print("="*100) print("This function has been called for %d times." % num_try) if num_try > 30: print("Terminate the recursive call due to many errors. Please check manually.") return num_errors = 0 arg_list = [] # Construct the lists of urls and file paths for dt, df in df_img_url.groupby("date"): img_url_list = list(df["img_url"]) dir_p_dt = dir_p + dt + "/" check_and_create_dir(dir_p) # need this line to set the permission check_and_create_dir(dir_p_dt) for i in range(len(img_url_list)): arg_list.append((img_url_list[i], dir_p_dt + str(i) + ".zip")) # Download the files in parallel pool = Pool(num_workers) result = pool.starmap(urlretrieve_worker, arg_list) pool.close() pool.join() for r in result: if r: num_errors += 1 if num_errors > 0: print("="*60) print("Has %d errors. Need to do again." % num_errors) num_try += 1 get_frames(df_img_url, num_try=num_try) else: print("DONE")
def run(ipaddress, concurrency, mode, function, write, view): ip_list = ip_format(ipaddress) if mode == 'thread': pool = ThreadPool(concurrency) else: pool = ProcPool(concurrency) t1 = time.time() result_list = pool.map(scan, ip_list) t2 = time.time() available_ip_port = list(filter(None, result_list)) if view: print('ping time: {}'.format(t2 - t1)) if function == 'tcp': ip_port_iter = ((ip, port) for ip in available_ip_port for port in range(PORT_RANGE[0], PORT_RANGE[-1] + 1)) t3 = time.time() result_list = pool.starmap(scan, ip_port_iter) t4 = time.time() available_ip_port = defaultdict(list) for result in result_list: if isinstance(result, tuple): available_ip_port[result[0]].append(result[1]) if view: print('tcp time: {}'.format(t4 - t3)) pool.close() pool.join() print(available_ip_port) if write: with open(write, 'w+') as f: json.dump(available_ip_port, f)
def compute_genome_gen_distances(genomes, gens, genome_gen_poses): """ compute edit distance between all genomes in genomes based on each gen in gens, and by help of genome_gen_poses :return: a dictionary contains a distance matrix for each gene """ p = Pool(initializer=init_pool, initargs=(genome_gen_poses, genomes)) # TODO for computing the result remove 0 otherwise the precomputed value will be used gen_edit_dists = p.starmap( genome_gen_distance, list(itertools.product(gens, range(len(genomes)), range(len(genomes))))[:0]) p.close() if gen_edit_dists: # check if we computed new value or if it should use precomputed value print(gen_edit_dists) else: gen_edit_dists = compute_genome_gen_distances_pre_computed result = {} # convert list like result to dict of matrixes for gen_edit_dist in gen_edit_dists: if gen_edit_dist is None: continue if gen_edit_dist[0] not in result: result[gen_edit_dist[0]] = np.full([len(genomes), len(genomes)], 0) result[gen_edit_dist[0]][gen_edit_dist[1]][ gen_edit_dist[2]] = gen_edit_dist[3] result[gen_edit_dist[0]][gen_edit_dist[2]][ gen_edit_dist[1]] = gen_edit_dist[3] return result
def docc(folder_name, nt, dt, finalcut, reftime, f2,f3, node): global fft_all, outpath pool = ThreadPool(node) outpath = join(folder_name,"%sto%s_COR" % (str(f2),str(f3))) if not os.path.exists(outpath): os.makedirs(outpath) ns = len(fft_all) nts = (fft_all[0].stats.npts) lag = int(finalcut/dt) mid_pos = int(nts/2) # tcorr = np.arange(-nts + 1, nts) # dn = np.where(np.abs(tcorr) <= lag)[0] cor = fft_all[0].copy() cor.stats.delta = dt cor.stats.starttime = reftime sta_pair = [] idx_lst = [] for i in np.arange(ns-1): for j in np.arange(i+1,ns): if fft_all[i].stats.station == fft_all[j].stats.station: continue sta_pair.append("%s.%s_%s.%s" % (fft_all[i].stats.station,fft_all[i].stats.channel, fft_all[j].stats.station,fft_all[j].stats.channel)) idx_lst.append([i, j]) t=time.clock() results = pool.starmap(compute_cc, zip(idx_lst, repeat(nts), repeat(mid_pos), repeat(lag), repeat(cor))) print("%d station pair using %d node:" % (len(sta_pair), node), (time.clock()-t), "s") pool.close() pool.join() return sta_pair
async def scrawl(self, threads=5): logger.log('Scrawling Trackemon..', 'green') await self.client.wait_until_ready() # get arrays channels id need to post shout_out_channels = [] for server in self.client.servers: for channel in server.channels: if channel.name in self.config.get('scrawl_channels', []): shout_out_channels.append(discord.Object(channel.id)) if len(shout_out_channels) == 0: raise Exception("No channel to shout out!") while not self.client.is_closed: logger.log('Scrawling Trackemon..', 'green') self._retrieve_session_id() # use multiprocessing if 'pokemons' in self.config.get('scrawl_trackemon'): pokemon_names = self.config.get('scrawl_trackemon')['pokemons'] pool = ThreadPool(threads) messages = pool.starmap(self.scrawl_trackemon, zip( pokemon_names, itertools.repeat(self.session_id))) for message in messages: if len(message): for channel in shout_out_channels: await self.client.send_message(channel, message) # increase delay to finish task await asyncio.sleep(self.config.get('delay_scrawl', 300))
def media_scraper(session, link, location, directory, post_count, username): print("Scraping " + location + ". Should take less than a minute.") pool = ThreadPool(max_threads) floor = math.floor(post_count / 100) if floor == 0: floor = 1 a = list(range(floor)) offset_array = [] for b in a: b = b * 100 offset_array.append(link.replace("offset=0", "offset=" + str(b))) media_set = pool.starmap(scrape_array, product(offset_array, [session])) media_set = [x for x in media_set if x is not None] media_set = list(chain.from_iterable(media_set)) if "/users/" == directory: directory = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/users/onlyfans/"+username+"/"\ + location+"/" else: directory = directory + username + "/" + location + "/" print("DIRECTORY - " + directory) if not os.path.exists(directory): os.makedirs(directory) with open(directory + 'links.json', 'w') as outfile: json.dump(media_set, outfile) return [media_set, directory]
def readAllSonars(TRIG, ECHO): from multiprocessing.dummy import Pool as ThreadPool pool = ThreadPool(len(ECHO)) distances = pool.starmap(readSonar, zip(TRIG, ECHO)) pool.close() pool.join() return distances[0], distances[1], distances[2], distances[3]
def main(genre): pool_of_threads = Pool(multiprocessing.cpu_count() - 1) # lets hope you have more than 1 cpu core... numbers = list(range(1,pages)) old_results = pool_of_threads.starmap( get_painting_list, zip( numbers, itertools.repeat(genre)) ) pool_of_threads.close() pool_of_threads.join() results = [] for item in old_results: if item: for x in item: results.append(x) pool_of_threads = Pool(multiprocessing.cpu_count() - 1) pool_of_threads.starmap(downloader, zip(enumerate(results), itertools.repeat(genre) ) ) pool_of_threads.close pool_of_threads.close()
def topN_translates(self, bot, update, number): chat_id = update.message.chat_id bot.sendChatAction(chat_id, ChatAction.TYPING) languages = defaultdict(int) pool = ThreadPool(4) args = [('https://launchpad.net/{}/+translations'.format(project), languages) for project in self.projects] pool.starmap(self.check_project_untranslated, args) pool.close() pool.join() text = ['Language - Number of translated strings'] top = sorted(languages.items(), key=lambda x: (x[1], x[0]), reverse=True)[:number] for index, (lang, translated) in enumerate(top): text.append('{0}) {1} - {2}'.format(index + 1, lang, translated)) bot.sendMessage(chat_id, text='\n'.join(text))
def pool_filter( candidates: List[Tuple[str, str]], compare_images: Callable[[str, str, float, float], bool], aspect_fuzziness: float, rms_error: float, chunk_size: float ) -> List[Tuple[str, str]]: pool = Pool(None) return [ c for c, keep in zip( candidates, pool.starmap( partial(compare_images, aspect_fuzziness=aspect_fuzziness, rms_error=rms_error), candidates, chunksize=chunk_size ) ) if keep ]
def main(argv): parser = argparse.ArgumentParser() parser.add_argument( 'exepath', help='path to the trace dump executable', type=file_type) parser.add_argument('files', nargs='*') parser.add_argument('-j', type=parallel_arg_type, help='number of parallel processes', default=1) args = parser.parse_args(argv[1:]) exepath = args.exepath files = [] for wildcard in args.files: files.extend(glob.glob(wildcard)) if len(files) == 0: print("No input files found!") return 1 # Test the executable first. valid = False try: p = subprocess.Popen([ exepath, '--log_file=stdout', ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() if p.returncode == 5: # code 5 = invalid trace file / trace file unspecified valid = True except OSError: pass if not valid: print("Trace executable invalid!") return 1 print("Processing...") pool = Pool(args.j) start_time = time.perf_counter() results = pool.starmap(run_dumper, zip(itertools.repeat(exepath), files)) pool.close() pool.join() elapsed_time = time.perf_counter() - start_time print("entire runtime took %.3f seconds" % elapsed_time) return 0
def backup_all_servers(): # Get a list of up and down servers hosts_up, hosts_down = check_alive_hosts(ssh_test_cmd) # Prepare the list for starmap two arguments. hosts_up_two_args = [] # Add the argument to hosts list and append to separate list. # Output should be something like [('a', 1), ('b', 1), ('c', 1)] for i in zip(hosts_up, repeat(rsync_stdout)): hosts_up_two_args.append(i) # Make the Pool of workers pool = ThreadPool(number_of_threads) results = pool.starmap(rsync_start, hosts_up_two_args) pool.close() pool.join()
def backup_one_server(server_ip): # This function need the parameter as a list # Prepare the list for starmap two arguments. hosts_up = [] hosts_up_two_args = [] hosts_up.append(server_ip) # Add the argument to hosts list and append to separate list. # Output should be something like [('a', 1), ('b', 1), ('c', 1)] # Always run backup one server in background ( append 0 ) for i in zip(hosts_up, repeat(0)): hosts_up_two_args.append(i) # Make the Pool of workers pool = ThreadPool(number_of_threads) results = pool.starmap(rsync_start, hosts_up_two_args) pool.close() pool.join()
def spider_kaiyuan(start,end,password): file_name = '账号{0}-{1}.txt'.format(start,end) with open(file_name, 'a+') as f: for account in range(int(start), int(end)): if (auth_account(account=account,password=password)): f.write('账号:{} 密码{}'.format(account,password) ) f.flush() def generate_func_args(start,end,password): return (start,end,password) if __name__ == '__main__': pool = ThreadPool(5) args = [] account_start = 888800000000 password = input("输入测试密码 测试区间在{} -{}\n".format(account_start+5*10000,account_start+9*10000+9999)) for i in range (5,9): args_map = generate_func_args(account_start+i*10000,account_start+i*10000+9999,password) args.append(args_map) pool.starmap(spider_kaiyuan, args)
return paramList def processSpider(url, filePath, threadName): sTime = time.time() session = requests.session() session.headers['User-Agent'] = random.choice(UserAgent) response = session.get(url) # todo add message if response code != 200 print(response.status_code) if response.status_code == 200: f = open(filePath, 'w+', encoding='utf-8') f.write(response.text) print(threadName + ": " + str(time.time() - sTime)) f.close() def getSession(): session = requests.session() session.headers['User-Agent'] = random.choice(UserAgent) return session # main() if __name__ == '__main__': pool = ThreadPool(20) paramList = getParam() print(paramList) pool.starmap(processSpider, paramList)
Rseq.print_line() # Unzipping gzipped files and adding correct extension if ext == 'gz': os.system('mkdir gzipped_reads') print('\nYour files are compressed. They will be decompressed.') if options.extension == 'gz': # checks whether commandline tool is used ext = options.ext_unzip else: # or the interactive dialogue ext = input('Please specify the file extension of the decompressed file [fasta, fastq]: ') # Parallelized extraction and copying print('Extracting files') pool = ThreadPool(int(thread_no)) pool.starmap(Rseq.gz_process, zip(files, itertools.repeat(ext))) pool.close() pool.join() # Executing the FastQC algorithm adap_set = adap_max Rseq.print_line() if exec_adapters in ['y', 'Y', 'yes']: # Analyzing the data with FastQC print('\nFastQC data analysis\n') Rseq.fqc([file_name + '.' + ext for file_name in fnames], thread_no) print('\nFastQC finished\n') # Generating the adapter list for fname in fnames:
query += ' -query ' + fasta_file query += ' -out ' + blast_output_file + ' -outfmt 6' os.system(query) with open(blast_output_file, 'r', encoding='utf-8') as f2: while True: line = f2.readline().split('\t') if float(line[3]) > 0.9 * len(seq): ofile.write(name + '\t' + line[3] + '\t' + line[4]) ofile.write('\t' + line[5] + '\t' + line[6] + '\t' + line[7]) ofile.write('\t' + line[8] + '\t' + line[9] + '\t' + line[10]) else: break os.remove(fasta_file) os.remove(blast_output_file) return(out) else: out = -1 return(out) with open(completeGenome, 'r') as f: t = 0 o = 'output.txt' while True: t = oneSequenceBlast(f, t, o) if t == -1: break pool = Pool() results = pool.starmap(oneSequenceBlast, mailsList)
os.system('scrapy crawl {0} -o {1}/{0}_items.json -t json'.format( spider_name, scraped_data_dir)) else: raise FileNotFoundError( 'Directory not found! Please check Scrapy project root and output directory paths ' 'passed in as arguments.') return def arg_parser(): parser = argparse.ArgumentParser( description='Script designed to run a series of spider scrapers to scrape data from ' 'remote sites.') parser.add_argument( '-p', dest='scrapy_project', required=True, nargs='?', type=str, help='location to scrapy project root directory.') parser.add_argument( '-o', dest='output_dir', required=True, nargs='?', type=str, help='location to output directory.') return vars(parser.parse_args()) if __name__ == '__main__': params = arg_parser() output_dir = params['output_dir'] project_dir = params['scrapy_project'] pool = ThreadPool(len(SPIDERS)) pool.starmap( crawl, zip(itertools.repeat(project_dir), itertools.repeat(output_dir), SPIDERS)) pool.close()
hardcordeMode = True # Use this if you don't want words of length 2 # Setting working directory to path of current file os.path.dirname(os.path.abspath(__file__)) # Checking if thesaurus exists and is correctly formatted # If not, create it if hardcordeMode: print("You are in hardcore mode") if not checkThesaurus(formattedThesaurusPath, tSizeHardcore): createThesaurusHardcore(originalThesaurusPath, formattedThesaurusPath) else: if not checkThesaurus(formattedThesaurusPath, tSize): createThesaurus(originalThesaurusPath, formattedThesaurusPath) # Open the new thesaurus and store it in memory f = open(formattedThesaurusPath, encoding="utf-8") thesaurus = list(f) f.close() # Creates the list of arguments for parallel function mailsList = [] for root, dirs, files in os.walk(mailsPath, topdown=False): for f in files: temp = [os.path.join(root, f), thesaurus, outputPath] mailsList.append(temp) # Parallel baby pool = ThreadPool() results = pool.starmap(parallelTagging, mailsList)
eTime = time.time() logging.info(fileName + " takes time:" + str(eTime - sTime) + ", url:" + url) def getDownloadParam(folder, suffix, list): paramList = [] # for i in range(1, 1192): for i in list: index = str(i) url = mainUrl + index + endUrl fileName = 'file-' + index + '.' + suffix param = (url, folder, fileName) paramList.append(param) return paramList if __name__ == '__main__': logging.basicConfig(filename="F:\Workspace\_data\log\\resumeSpider\logging-11-27-2.log", level=logging.INFO) folder = "H:\\resume4" startTime = time.time() pool = ThreadPool(20) errorRange = getNumbersFromErrLog() # normalRange = range(1, 1200) paramList = getDownloadParam(folder, "zip", errorRange) pool.starmap(downloadResume, paramList) endTime = time.time() logging.info('take time:'+ str(endTime-startTime))
class MySteamFriends(object): def __init__(self, api_key: str, steam_username: str = None, steam_id: str = None, debugging: bool = False, concurrent_api: int = 4): """Initialises a connection to the Steam Web API and populates a list of friends. Args: api_key (str): API key from https://steamcommunity.com/dev/apikey steam_username (Optional[str]): steam username to base friends list from steam_id (Optional[str]): steam ID to base friends list from (alternative to steam_username) debugging (Optional[bool]): Enable debugging info. Defaults to off concurrent_api (Optional[int]): How many concurrent Steam API subprocesses to run """ if api_key is "": raise NameError("You don't have an api_key set!") if debugging is True: basicConfig(stream=sys.stdout, level=DEBUG) self.steam_api = WebAPI(key=api_key) if steam_id is None and steam_username is None: raise NameError("You don't call MySteamFriends with steam_username or steam_id (either required).") self.my_steam_id = steam_id if self.my_steam_id is None: self.my_steam_id = self.__get_my_steam_id(steam_username) self.friends_list = self.__get_my_friends_list() self.my_games_list = self.get_users_games(self.my_steam_id) self.total_gametime = self.get_my_total_playtime() self.api_pool = ThreadPool(concurrent_api) debug("api_key: %s, steam_ide: %s, my_steam_id: %s" % (api_key, self.my_steam_id, self.my_steam_id)) def __get_my_steam_id(self, steam_user: str) -> str: try: return self.steam_api.ISteamUser.ResolveVanityURL(vanityurl=steam_user, url_type=1)['response']['steamid'] except KeyError: raise NameError("That steam username doesn't exist!") def __get_my_friends_list(self) -> dict: friends = self.steam_api.ISteamUser.GetFriendList(steamid=self.my_steam_id)['friendslist']['friends'] friends_list = [f['steamid'] for f in friends] friends_list.append(self.my_steam_id) return friends_list def __populate_my_friends_list(self): friends_list_detailed = self.api_pool.map(self.get_steam_user_dict, self.friends_list) return friends_list_detailed def _get_game_user_info_dict(self, uid: str, appid: str) -> dict: gameinfo = self.get_game_user_info(uid, appid) if gameinfo: return {uid: gameinfo} def get_my_total_playtime(self) -> int: total = 0 for game in self.my_games_list: total += int(game['playtime_forever']) return total def get_game_name(self, appid: str) -> str: return [game['name'] for game in self.my_games_list if str(game['appid']) == appid][0] def get_steam_user(self, sid: str) -> dict: return self.steam_api.ISteamUser.GetPlayerSummaries(steamids=sid)['response']['players'][0] def get_steam_username(self, sid: str) -> str: return self.get_steam_user(sid)['personaname'] def get_steam_user_dict(self, sid: str) -> dict: result = self.get_steam_user(sid) if result: return {sid: result} def get_users_games(self, sid: str) -> dict: result = self.steam_api.IPlayerService.GetOwnedGames(steamid=sid, include_played_free_games=1, include_appinfo=1, appids_filter=0)['response'] if 'games' in result: return result['games'] def get_game_user_info(self, uid: str, appid: str) -> dict: games = self.get_users_games(uid) if games: return [game for game in games if str(game['appid']) == appid] def get_everyones_gamestats(self, appid: str) -> dict: # steam API is slow; uses threading to submit concurrent requests results = self.api_pool.starmap(self._get_game_user_info_dict, zip(self.friends_list, itertools.repeat(appid))) # transform result containing actual results, where key is sid result_dict = {} for result in list(filter(None.__ne__, results)): for key, value in result.items(): result_dict[key] = value[0] return result_dict def get_game_stats_detailed(self, gamestats: dict) -> list: # steam API is slow; uses threading to submit concurrent requests return self.api_pool.map(self._combine_steam_user_game_stats, gamestats.items()) def _combine_steam_user_game_stats(self, data: tuple) -> dict: for sid, game in [data]: return ({ "steam_user": self.get_steam_user(sid), "game_stats": game })