def __init__(self, metric, mode): assert metric in ['hamming' ], 'expect hamming, but got {}'.format(metric) assert mode in [ 'inter-camera', 'intra-camera', 'all' ], 'expect inter-camera/intra-camera and all, but got'.format(metric) self.metric = metric self.mode = mode # please kindly install progressbar library with command: ```pip install progressbar2``` self.bar_rank = progressbar.ProgressBar(widgets=[ 'Ranking (Compute Hamming Dist and Counting Sort): ', progressbar.Percentage(), progressbar.Bar(), progressbar.SimpleProgress( format='%(value_s)s/%(max_value_s)s'), ' [', progressbar.Timer(), ',', FileTransferFrequency(format='%(scaled)5.1f ms/query'), '] ' ]) self.bar_evaluate = progressbar.ProgressBar(widgets=[ 'Evaluating (Compute mAP and CMC): ', progressbar.Percentage(), progressbar.Bar(), progressbar.SimpleProgress( format='%(value_s)s/%(max_value_s)s'), ' [', progressbar.Timer(), ',', FileTransferFrequency(format='%(scaled)5.1f ms/query'), '] ' ])
def run(self): if os.path.isfile(self.custom_rule): rules = yara.compile(self.custom_rule) elif isinstance(self.custom_rule, str): rules = yara.compile(source=self.custom_rule) matches = [] count = 0 for root, dirs, files in os.walk(self.target): for entry in files: count += 1 pbar = progressbar.ProgressBar(widgets=[progressbar.Bar('+'), ' ', progressbar.Percentage(), ' | ', progressbar.ETA(), ' | ', progressbar.SimpleProgress()], maxval=count).start() p = 0 for root, dirs, files in os.walk(self.target+'\\'): for entry in files: p += 1 pbar.update(p) e = os.path.join(root, entry) try: m = rules.match(e) if len(m) > 1: pass if m: matches.append({'match': m, 'file': e}) except Exception, err: pass
def test_all_widgets_large_values(max_value): widgets = [ progressbar.Timer(), progressbar.ETA(), progressbar.AdaptiveETA(), progressbar.AbsoluteETA(), progressbar.DataSize(), progressbar.FileTransferSpeed(), progressbar.AdaptiveTransferSpeed(), progressbar.AnimatedMarker(), progressbar.Counter(), progressbar.Percentage(), progressbar.FormatLabel('%(value)d/%(max_value)d'), progressbar.SimpleProgress(), progressbar.Bar(fill=lambda progress, data, width: '#'), progressbar.ReverseBar(), progressbar.BouncingBar(), progressbar.FormatCustomText('Custom %(text)s', dict(text='text')), ] p = progressbar.ProgressBar(widgets=widgets, max_value=max_value) p.update() time.sleep(1) p.update() for i in range(0, 10**6, 10**4): time.sleep(1) p.update(i)
def test_all_widgets_max_width(max_width, term_width): widgets = [ progressbar.Timer(max_width=max_width), progressbar.ETA(max_width=max_width), progressbar.AdaptiveETA(max_width=max_width), progressbar.AbsoluteETA(max_width=max_width), progressbar.DataSize(max_width=max_width), progressbar.FileTransferSpeed(max_width=max_width), progressbar.AdaptiveTransferSpeed(max_width=max_width), progressbar.AnimatedMarker(max_width=max_width), progressbar.Counter(max_width=max_width), progressbar.Percentage(max_width=max_width), progressbar.FormatLabel('%(value)d', max_width=max_width), progressbar.SimpleProgress(max_width=max_width), progressbar.Bar(max_width=max_width), progressbar.ReverseBar(max_width=max_width), progressbar.BouncingBar(max_width=max_width), progressbar.FormatCustomText('Custom %(text)s', dict(text='text'), max_width=max_width), progressbar.DynamicMessage('custom', max_width=max_width), progressbar.CurrentTime(max_width=max_width), ] p = progressbar.ProgressBar(widgets=widgets, term_width=term_width) p.update(0) p.update() for widget in p._format_widgets(): if max_width and max_width < term_width: assert widget == '' else: assert widget != ''
def main(args): global es, MAX_USERS es = Elasticsearch(args.elasticsearch) MAX_USERS = args.max_users # Extraer usuarios del primer índice print("Recuperando usuarios de /r/lonely...") users = get_users(args.source_users) # Buscar posibles usuarios "gemelos" print("Obteniendo posibles gemelos...") # Una barra de progreso que muestra el último usuario procesado además de la información habitual widgets = [ pb.Percentage(), " (", pb.SimpleProgress(), ") ", pb.Bar(), " ", pb.FormatLabel(""), " ", pb.Timer(), " ", pb.ETA(), " " ] bar = pb.ProgressBar(max_value=len(users), widgets=widgets) for username in bar(users): widgets[6] = pb.FormatLabel("User: "******" ") find_twins(username, users[username], args.user_index) print("Filtrando usuarios que hayan posteado en el subreddit...") filter_subreddit_posters(users) # Se guarda el diccionario resultante en un .pickle, formato de serialización de Python print("Serializando los resultados...") with open(args.output, "wb") as f: pickle.dump(users, f)
def maybeProgressBar(show, maxValue, prefix): """ A context manager to maybe show a progress bar. @param show: If C{True}, yield a progress bar, else a Class with an C{update} method that does nothing. @param maxValue: The C{int} number of tasks to show progress for. @param prefix: A C{str} prefix, to appear at the start of the progress bar. """ if show and os.isatty(2): widgets = [ progressbar.SimpleProgress(format='%(value_s)s/%(max_value_s)s'), progressbar.Percentage(format=' %(percentage)3d%%'), ' ', progressbar.Bar(marker='\x1b[33m#\x1b[39m'), ' ', progressbar.Timer(format='Elapsed: %(elapsed)s'), ' ', progressbar.ETA(format='ETA: %(eta)8s'), ] with progressbar.ProgressBar(max_value=maxValue, widgets=widgets, prefix=prefix) as bar: yield bar else: class Bar: update = staticmethod(lambda _: None) yield Bar
def get_forums(): logging.info('Récupération des forums') if config.debug: progress = progressbar.NoProgressBar() else: progress = progressbar.ProgressBar(widgets=[progressbar.SimpleProgress('/'), ' ', progressbar.Bar("#","[","]"), progressbar.Percentage()]) d = PyQuery(url=config.rooturl + '/a-f1/', opener=fa_opener) save.forums = [] levels = {} n = 1 for i in progress([i for i in d.find("select option") if i.get("value", "-1") != "-1"]): id = i.get("value", "-1") logging.debug('Récupération: forum %s', id) title = re.search('(((\||\xa0)(\xa0\xa0\xa0))*)\|--([^<]+)', i.text).group(5) level = len(re.findall('(\||\xa0)\xa0\xa0\xa0', i.text)) if level <= 0: parent = 0 else: parent = levels[level-1] levels[level] = n d = PyQuery(url=config.rooturl+'/admin/index.forum?part=general&sub=general&mode=edit&fid=' + id + '&extended_admin=1&' + tid, opener=fa_opener) try: description = d("textarea").text() except: description = "" save.forums.append({'id': int(id[1:]), 'newid': n, 'type': id[0], 'parent': parent, 'title': title, 'description': description, 'parsed': False}) n += 1
def create_progressbar(max_val: int): pbar = progressbar.ProgressBar(maxval=max_val, widgets=[ 'Fetching messages...', # Статический текст progressbar.Bar(left='[', marker='=', right=']'), # Прогресс progressbar.SimpleProgress(), # Надпись "6 из 10" ]) return pbar
def run(agent): config = agent.config episodes = config.get('episodes', 1) train_mode = config.get('train_mode', True) if train_mode: timer = pb.ProgressBar( widgets=[ 'Episode: ', pb.SimpleProgress(), ' ', pb.Variable('Score'), ' ', pb.AdaptiveETA() ], maxval=episodes ).start() scores = [] for i in range(1, episodes+1): score = episode(agent) scores.append(score) if train_mode: timer.update(i, Score=score) if train_mode: timer.finish() return scores
def getImages(self): fileArray = [] duplicates = [] widgets = [ 'Getting images: [', progressbar.SimpleProgress(), '] ', progressbar.Bar(), ' (', progressbar.Timer(), ') ', ] for image in progressbar.progressbar(os.scandir(self.path), widgets=widgets, max_value=len( os.listdir(self.path))): if image.path.endswith('.png') or image.path.endswith( '.jpg') or image.path.endswith('.jpeg'): hash = imagehash.average_hash(Image.open(image.path)) fileArray.append((image, hash)) for image1 in fileArray: duplicate = [] for image2 in fileArray: if image1[1] - image2[1] == 0: duplicate.append( (image2[0], os.stat(image2[0].path).st_size)) if len(duplicate) > 1: duplicates.append(duplicate) return duplicates
def import_placecat(): fields = ['id', 'teg_id', 'ordering', 'second_name', 'published'] sql = "SELECT %s FROM jos_places_tags WHERE parent_id=0" % ( ', '.join(fields)) cursor = db.cursor() cursor.execute(sql) all_cats = cursor.fetchall() count = 0 bar = progressbar.ProgressBar(maxval=len(all_cats), widgets=[ 'import place category: ', progressbar.SimpleProgress(), ]).start() for row in all_cats: tag = Tag.objects.get(pk=int(row[1])) place_category = PlaceCategory(pk=int(row[0]), main_tag=tag, name=str(row[3]),\ category_mean=int(row[2]) if row[2]>0 else 1, is_published=bool(row[4])) place_category.save() sql = "SELECT teg_id FROM jos_places_tags WHERE parent_id=%s" % ( place_category.pk) cursor = db.cursor() cursor.execute(sql) child_tags = cursor.fetchall() for r in child_tags: t = Tag.objects.get(pk=r[0]) place_category.tagging.add(t) place_category.save() count += 1 bar.update(count) bar.finish()
def import_message(): fields = ['fromid', 'toid', 'toread', 'im_date', 'message'] sql = "SELECT %s FROM jos_uddeim_in" % (', '.join(fields)) cursor = db.cursor() cursor.execute(sql) all_messages = cursor.fetchall() count = 0 bar = progressbar.ProgressBar(maxval=len(all_messages), widgets=[ 'import message: ', progressbar.SimpleProgress(), ]).start() for row in all_messages: sent_at = datetime.fromtimestamp(row[3]) try: from_user = User.objects.get(pk=row[0]) to_user = User.objects.get(pk=row[1]) except: continue members = [from_user, to_user] chain = Chain.objects.create() chain.members.add(*members) chain.have_read.add(*members) chain.save() msg = Message(sender=from_user, sent_at=sent_at, body=row[4]) msg.chain = chain msg.save() count += 1 bar.update(count) bar.finish()
def execute(self, sigmond_inputs): if len(sigmond_inputs) == 0: logging.info("No sigmond inputs passed to executor...") return logging.info(f"Executing {len(sigmond_inputs)} job(s)...") if self.mode == "local": executor = concurrent.futures.ThreadPoolExecutor( max_workers=self.simultaneous_jobs) futures = [] for sig_input in sigmond_inputs: futures.append( executor.submit(subprocess.run, [self.sigmond_batch, sig_input.filename])) executor.shutdown(False) status = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED, timeout=0.1) pbar_widgets = [ progressbar.SimpleProgress(), ' job(s) finished ', progressbar.Percentage(), ' ', progressbar.Bar() ] pbar = progressbar.ProgressBar(widgets=pbar_widgets, maxval=len(futures)).start() while len(status.not_done): pbar.update(len(status.done)) status = concurrent.futures.wait( futures, return_when=concurrent.futures.FIRST_COMPLETED) pbar.finish()
def cigntool_check_files(files): print('--== PHASE 1 ==--') print("Checking files for digital signature") unver_files = [] vered = 0 f = 0 pb = progressbar.ProgressBar(maxval=len(files), widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.SimpleProgress() ]) pb.start() for file in files: process = subprocess.run(['signtool', 'verify', '/pa', file], stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) if process.returncode == 1: unver_files.append(file) else: vered = vered + 1 f = f + 1 pb.update(f) pb.finish() print("Successfully verified {} files. Remaining {}".format( vered, len(unver_files))) return unver_files
def eval_pop_random(pop, n, showbar=False): ''' pop: population (list of nets) n: number of test games to be played FOR EACH BUDDY returns -> (population avg winrate, [list of net's individal winrates in test]) ''' # print("hey") global BAR_V global BAR s = 0 res = [0] * len(pop) # if show: # print("***Evaluating population of {} nets on {} games each against random player".format(len(pop), n)) if showbar: BAR = progressbar.ProgressBar(maxval=len(pop)*n, \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.SimpleProgress()]) BAR.start() for i, net in enumerate(pop): winrate = test_net_random(net, n, showbar=showbar) s += winrate res[i] = winrate # print("Net {}: {}".format(i, winrate)) avg_s = s / len(pop) # if show: # print("population winrate = {}".format(avg_s)) if showbar: BAR.finish() return avg_s, res
def __init__(self, config): ### somethings self.cfg = config self.dataset = dataset_protocol[config.data.protocol](config) self.dataloader = torch.utils.data.DataLoader( self.dataset, batch_size=config.data.batch_size, num_workers=config.framework.num_thread, ) widgets = [ "Testing phase [", progressbar.SimpleProgress(), "] [", progressbar.Percentage(), "] ", progressbar.Bar(marker="█"), " (", progressbar.Timer(), " ", progressbar.ETA(), ") ", ] self.bar = progressbar.ProgressBar(max_value=config.data.batch_size, widgets=widgets, term_width=100) ### logging self.logger = open("{}/{}.json".format(config.base_dir, "test"), "w") ### model self.model = model_protocol[config.model.protocol](config) self.load_checkpoints() if config.framework.num_gpu > 0: self.model.to(device=0) self.model.eval()
def GroupDetailsScrap(keyword): group_links = GroupLinkScrap() bar = progressbar.ProgressBar( widgets=[progressbar.SimpleProgress()], max_value=len(group_links), ).start() i = 0 for link in group_links: try: driver.get(link) time.sleep(2) group_name = driver.find_element_by_css_selector( '#seo_h1_tag > a').text group_type = driver.find_elements_by_css_selector( '#pagelet_group_about > div:nth-child(1) > div._j1y > div > div > div._3-8n._3qn7._61-0._2fyi._3qng > div:nth-child(2) > div._2ieo' ) if len(group_type) > 0: group_type_name = group_type[0].text else: group_type_name = driver.find_element_by_css_selector( '#groupsDescriptionBox > div > div._3-8x > span > span' ).text #print(link,group_name,group_type_name) joinGroup(link, group_name, group_type_name, keyword) i = i + 1 time.sleep(0.1) bar.update(i) except NoSuchElementException: # spelling error making this code not work as expected pass bar.finish()
def __init__(self, max_epoch, batch_size, N, custom_text_dict): self.current_epoch = 0 self.current_batch = 0 self.max_batch = int(math.ceil(N / batch_size)) max_value = max_epoch * self.max_batch base_text = '(Epoch: [%(epoch)d/%(max_epoch)d], Batch: [%(batch)d/%(max_batch)d]) ' base_dict = dict(epoch=1, max_epoch=max_epoch, batch=1, max_batch=self.max_batch) custom_text = '' for key, value in custom_text_dict.items(): custom_text += "{} = %({}).6f, ".format(value, key) base_dict[key] = -1 self.format_custom_text = progressbar.FormatCustomText( base_text + custom_text, base_dict, ) widgets = [ progressbar.Percentage(), ' ', progressbar.AnimatedMarker(), ' ', progressbar.Bar(marker='█'), ' ', progressbar.SimpleProgress(), ' ', self.format_custom_text, ' ', progressbar.ETA() ] self.bar = progressbar.ProgressBar(max_value=max_value, widgets=widgets) self.bar.start()
def __init__(self, filename): self.data = [] self.labels = [] functions = [] signature = "" d = open(self.CODE_DIR + filename, "r", encoding="utf-8") for ind, elem in enumerate(d): if ind % 3 == 0: signature = elem elif ind % 3 == 1: functions.append((signature.strip() + elem.strip())) else: self.labels.append(torch.tensor(int(elem))) self.length = min(len(functions), len(self.labels)) self.max_chars = len(max(functions, key=len)) print("Before one hot encoding") widgets = [ progressbar.SimpleProgress(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] bar = progressbar.ProgressBar(widgets=widgets, maxval=len(functions)).start() for ind, elem in enumerate(functions): self.data.append(self.line_to_tensor(elem)) bar.update(ind) bar.finish()
def __init__(self, obj: object): super().__init__(obj) self.widgets = [ progressbar.Bar(marker="#", left="[", right="]"), progressbar.Percentage(), " | ", progressbar.FileTransferSpeed(), " | ", progressbar.SimpleProgress(), " | ", progressbar.ETA() ] self.bar = None self.block_size = 0 self.packets_sent = 0 self.pages_sent = 0 self.packet_count = 0 self.page_count = 0 self.flash_size = 0 self.current_page = 0 self.data_start = 0 self.data_end = 0 self.data = None self.page_address = 0 self.crc = 0 self.start_time = 0 self.end_time = 0 self.duration = 0
def create_progress_bar(dynamic_msg=None): """ Create a simple progressbar to monitor the training procedure. Usage: bar = _create_progress_bar('loss') L = [] for i in bar(iterable): ... L.append(...) bar.dynamic_messages['loss'] = np.mean(L) :param dynamic_msg: a name of the measure being monitored (loss, accuracy, etc.) :return: a ProgressBar() object. """ widgets = [ '[batch ', progressbar.SimpleProgress(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ' ] if dynamic_msg is not None: widgets.append(progressbar.DynamicMessage(dynamic_msg)) return progressbar.ProgressBar(widgets=widgets)
def init_engine(): # init control parameter th.result = [] th.thread_mode = True if conf.engine_mode == "multi_threaded" else False th.tasks = conf.task_queue th.tasks_num = conf.task_queue.qsize() th.output_path = conf.output_path th.scan_count = th.found_count = 0 th.is_continue = True th.console_width = getTerminalSize()[0] - 2 # set concurrent number if th.tasks.qsize() < conf.concurrent_num: th.concurrent_count = th.concurrent_num = th.tasks.qsize() else: th.concurrent_count = th.concurrent_num = conf.concurrent_num # set process bar widgets = [ '[', progressbar.SimpleProgress(), ']', '[', progressbar.Timer(), ']' ] global pbar pbar = progressbar.ProgressBar(redirect_stdout=True, widgets=widgets) th.start_time = time.time()
def __init__(self, length): import progressbar as p2 class modifiedDynamicMessage(p2.DynamicMessage): def __call__(self, progress, data): string = data['dynamic_messages'][self.name] if string is not None: return string else: return '' return self.name + ': ' + 6 * '-' bar = p2.ProgressBar( max_value=length, widgets=[ p2.Percentage(), ' ', p2.SimpleProgress(format='%(value)s/%(max_value)s'), p2.Bar('>'), p2.Timer(format='%(elapsed)s'), ' ', p2.ETA(format='ETA %(eta)s', format_not_started='ETA --:--:--', format_finished='TIM %(elapsed)s'), ' ', modifiedDynamicMessage('msg') ]) self.bar = bar self.length = length
def run(self, seed_img, critics): """Run the optimizer on the image according to the loss returned by the critics. """ image = seed_img.to(self.device).requires_grad_(True) obj = MultiCriticObjective(self.encoder, critics) opt = SolverLBFGS(obj, image, lr=self.lr) widgets = [ progressbar.SimpleProgress(), " | ", progressbar.Variable("loss", format="{name}: {value:0.3e}"), " ", progressbar.Bar(marker="■", fill="·"), " ", progressbar.ETA(), ] progress = progressbar.ProgressBar( max_value=self.max_iter, widgets=widgets, variables={"loss": float("+inf")} ) try: for i, loss in self._iterate(opt): # Update the progress bar with the result! progress.update(i, loss=loss) # Constrain the image to the valid color range. image.data.clamp_(0.0, 1.0) # Return back to the user... yield loss, image progress.max_value = i finally: progress.finish()
def widget(what=""): padding = 30 return [ progressbar.ETA(), ' ', progressbar.Bar('='), ' ', progressbar.SimpleProgress(), ' ' if what else "", what ]
def test_all_widgets_small_values(max_value): widgets = [ progressbar.Timer(), progressbar.ETA(), progressbar.AdaptiveETA(), progressbar.AbsoluteETA(), progressbar.DataSize(), progressbar.FileTransferSpeed(), progressbar.AdaptiveTransferSpeed(), progressbar.AnimatedMarker(), progressbar.Counter(), progressbar.Percentage(), progressbar.FormatLabel('%(value)d'), progressbar.SimpleProgress(), progressbar.Bar(), progressbar.ReverseBar(), progressbar.BouncingBar(), progressbar.CurrentTime(), progressbar.CurrentTime(microseconds=False), progressbar.CurrentTime(microseconds=True), ] p = progressbar.ProgressBar(widgets=widgets, max_value=max_value) for i in range(10): time.sleep(1) p.update(i + 1) p.finish()
def load_video2images(capture, length, height, width, channels, mean_image): # progress bar bar = progressbar.ProgressBar(maxval=length, widgets=[ progressbar.Bar('>', '[', ']'), ' ', progressbar.SimpleProgress(), ' ', progressbar.Percentage(), ' ', progressbar.ETA() ]).start() frame_volume = np.zeros((length, 1, channels, height, width), dtype=np.float32) for i in range(0, length): # bar.update(i) retval, frame = load_convert_image(capture, mean_image, height, width, channels, is_sub_mean=True, is_scale=True, is_transpose=True) if retval: frame_volume[i, 0, ...] = frame else: frame_volume[i] = frame_volume[i - 1] print('missing frame {}'.format(i)) capture.release() return frame_volume
def test(self): weightses = os.listdir(self.test_root) weightses = list( filter(lambda x: os.path.splitext(x)[-1] == '.pkl', weightses)) pb = progressbar.ProgressBar(widgets=[ progressbar.Percentage(), '(', progressbar.SimpleProgress(), ')', progressbar.Bar(), progressbar.ETA(), ' ', progressbar.Timer() ]) pb.start(max_value=len(weightses)) min_err = 10000 best_epoch = 0 for i, weights_file in enumerate(weightses): pb.update(i) # weights config weights_path = os.path.join(self.test_root, weights_file) epoch = int(weights_file.split('_')[-1].split('.')[0]) # load weights self.net.load_state_dict(torch.load(weights_path)) total, errors = self._test(epoch=epoch) if errors < min_err: min_err = errors best_epoch = epoch pb.finish() print(f'best epoch: epoch {best_epoch}, errors: {min_err} / {total}')
async def main(): # path to credentials file credentials_file = "creds.yml" # path to spreadsheet with data for the script filename = "Docs2CollectionItems.xlsx" # Use pandas to read an excel spreadsheet into a dataframe batch = pandas.read_excel(filename, na_filter=False) # Setup a progress bar that will display in the console widgets = [ ' (', progressbar.SimpleProgress(), ' ', progressbar.Percentage(), ') ', progressbar.Bar(), ' [', progressbar.Timer(), '] ', ] # Create the progress bar with progressbar.ProgressBar(maxval=len(batch), widgets=widgets) as bar: # Create the treillage context manager async with Treillage(credentials_file, requests_per_second=8) as tr: # Iterate over every row in the spreadsheet for idx, row in batch.iterrows(): # Process the data from the row await handle_document(tr=tr, projectid=row["__ProjectID"], sectionselector=row["SectionSelector"], collectionid=row["__CollectionItemGuid"], fieldselector=row["FieldSelector"], docids=json.loads(row["JSON__DocIDs"])) # Update the position of the progress bar bar.update(idx)
def build_voc(vocab_path, pre_vocab_size, word_d=300, name=None, chunksize=120000): if name is None: name = os.path.splitext(basename(vocab_path))[0] dir = os.path.dirname(vocab_path) word_dict, embedding = load_embedding(dir, name) if word_dict is not None and embedding is not None: return word_dict, embedding word_dict = {} embedding = np.empty((0, word_d)) em_zipfile = zipfile.ZipFile(vocab_path) em_df = pd.read_csv(em_zipfile.open(em_zipfile.namelist()[0], 'r'), sep=' ', header=None, quoting=3, encoding='utf-8', keep_default_na=False, iterator=True, chunksize=chunksize) print('Approximate', pre_vocab_size, 'words') wdgts = [ pgb.SimpleProgress(), ' ', pgb.Bar(marker='∎', left='|', right='|'), ' ', pgb.Timer(), ' ', pgb.ETA() ] with pgb.ProgressBar(widgets=wdgts, maxval=pre_vocab_size) as p: for i, chuck in enumerate(em_df): for j, row in chuck.iterrows(): if row[0] in word_dict: # print('line:(', i * chunksize + j, word_dict[row[0]], ')') # print('word:', row[0]) print('Ignore some unknown words') continue word_dict[row[0]] = i * chunksize + j p.update(i * chunksize + j) embedding = np.row_stack((embedding, np.asarray(chuck.iloc[:, 1:]))) embedding = np.row_stack((embedding, embedding.mean(axis=0))) word_dict['<unk>'] = len(word_dict) # print(len(word_dict)) # print(embedding.shape) # embedding = np.asarray(em_df.iloc[:, 1:]) # embedding = np.row_stack((embedding, embedding.mean(axis=0))) # return word_dict, embedding save_embedding(dir, name, word_dict, embedding) return word_dict, embedding