def xml_to_yolo_4(boxes, label_map, save_dir="yolo/", ratio=0.8, shuffle=True, no_obj_dir=None): save_dir = Path(save_dir) train_dir = save_dir / "train" valid_dir = save_dir / "val" save_dir.mkdir() train_dir.mkdir() valid_dir.mkdir() boxes_by_name = boxes.getBoxesBy(lambda box: box.getImageName()) image_names = sorted(boxes_by_name.keys()) nb_train = round(ratio * len(boxes_by_name)) new_names = [] if shuffle: random_gen = random.Random(498_562_751) image_names = random_gen.sample(image_names, len(image_names)) for i, image_name in tenumerate(image_names, unit="img"): image_boxes = boxes_by_name[image_name] folder = train_dir if i < nb_train else valid_dir new_image_name = folder / Path(image_name).with_stem(f"im_{i:06}").name new_names.append(new_image_name) description = "\n".join("{} {} {} {} {}".format(label_map[box.getClassId()], *box.getRelativeBoundingBox()) for box in image_boxes) new_image_name.with_suffix(".txt").write_text(description) shutil.copy(image_name, new_image_name) (save_dir / "train.txt").write_text("".join(f"{Path('data/train') / n.name}\n" for n in new_names[:nb_train])) (save_dir / "val.txt").write_text("".join(f"{Path('data/val') / n.name}\n" for n in new_names[nb_train:])) if no_obj_dir: no_obj_dir = Path(no_obj_dir) image_names = sorted(list(no_obj_dir.glob("*.jpg"))) nb_train = round(ratio * len(image_names)) new_names = [] if shuffle: rand_gen = random.Random(478_737_303) image_names = rand_gen.sample(image_names, len(image_names)) for i, image_name in tenumerate(image_names): folder = train_dir if i < nb_train else valid_dir new_image_name = folder / Path(image_name).with_stem(f"im_no_obj_{i:06}").name new_names.append(new_image_name) shutil.copy(image_name, new_image_name) new_image_name.with_suffix(".txt").touch() with (save_dir / "train.txt").open("a") as f: f.write("".join(f"{Path('data/train') / n.name}\n" for n in new_names[:nb_train])) with (save_dir / "val.txt").open("a") as f: f.write("".join(f"{Path('data/val') / n.name}\n" for n in new_names[nb_train:]))
def evaluation_measures(self): df_path = [] df_match = [] for i, j in tenumerate(self.select_rows()): a = converting_path_to_xy(j[1]) b = converting_path_to_xy(j[0]) df_path.append(a) df_match.append(b) dist_frech_cut = [] dist_frech_full = [] arc_length_diff_cut = [] arc_length_diff_full = [] tracked_vehicle = [] mode = [] for i, j in tenumerate(df_path): tracked_vehicle.append(j['Tracked Vehicle'].values[0]) mode.append(j['Type'].values[0]) p = j.loc[:, ['x', 'y']] q = df_match[i].loc[:, ['x', 'y']] if len(j) < 3: dist_frech_cut.append(0) arc_length_diff_cut.append(0) d2 = similaritymeasures.frechet_dist(p.values, q.values) dist_frech_full.append(d2) l_p_f = similaritymeasures.get_arc_length(p.values) l_p2 = l_p_f[0] l_m_f = similaritymeasures.get_arc_length(q.values) l_m2 = l_m_f[0] arc_length_diff_full.append(round(abs(l_p2 - l_m2), 3)) continue d1 = similaritymeasures.frechet_dist(p.values[1:-1], q.values[1:-1]) d2 = similaritymeasures.frechet_dist(p.values, q.values) l_p = similaritymeasures.get_arc_length(p.values[1:-1]) l_p1 = l_p[0] l_m = similaritymeasures.get_arc_length(q.values[1:-1]) l_m1 = l_m[0] l_p_f = similaritymeasures.get_arc_length(p.values) l_p2 = l_p_f[0] l_m_f = similaritymeasures.get_arc_length(q.values) l_m2 = l_m_f[0] dist_frech_full.append(d2) dist_frech_cut.append(d1) arc_length_diff_cut.append(round(abs(l_p1 - l_m1), 3)) arc_length_diff_full.append(round(abs(l_p2 - l_m2), 3)) evaluation = { 'ID': tracked_vehicle, 'Type': mode, 'Frechet_distance': dist_frech_full, 'Frechet_distance_cut': dist_frech_cut, 'Length_difference': arc_length_diff_full, 'Length_difference_cut': arc_length_diff_cut } evaluation = pd.DataFrame(evaluation) return evaluation
def test_enumerate(): """Test contrib.tenumerate""" with closing(StringIO()) as our_file: a = range(9) assert list(tenumerate(a, file=our_file)) == list(enumerate(a)) assert list(tenumerate(a, 42, file=our_file)) == list(enumerate(a, 42)) with closing(StringIO()) as our_file: _ = list(tenumerate((i for i in a), file=our_file)) assert "100%" not in our_file.getvalue() with closing(StringIO()) as our_file: _ = list(tenumerate((i for i in a), file=our_file, total=len(a))) assert "100%" in our_file.getvalue()
def fraction_wrongly_matched(self, threshold_angle=45): wrongly_matched = { 'id': [], 'type': [], 'wrong_1': [], 'average_speed_1': [], 'bool_w1': [], 'wrong_2': [], 'average_speed_2': [], 'bool_w2': [], 'wrong_both': [], 'bool_wb': [], 'length_trajectory': [] } for ind, traj in tenumerate(self.tracks_line): wrongly_matched['id'].append((ind, traj['track_id'].values[0])) wrongly_matched['type'].append(traj['type'].values[0]) wm = traj['wrong_match'].values speeds = traj[['speed_x', 'speed_y']].values w1 = [ speeds[i][0] for i, j in enumerate(wm) if j[0] > threshold_angle ] w2 = [ speeds[i][1] for i, j in enumerate(wm) if j[1] > threshold_angle ] wb = [ i for i, j in enumerate(wm) if j[0] > threshold_angle and j[1] > threshold_angle ] wrongly_matched['wrong_1'].append( round(len(w1) / len(traj) * 100, 1)) wrongly_matched['wrong_2'].append( round(len(w2) / len(traj) * 100, 1)) wrongly_matched['wrong_both'].append( round(len(wb) / len(traj) * 100, 1)) if w1: wrongly_matched['bool_w1'].append(True) else: wrongly_matched['bool_w1'].append(False) if w2: wrongly_matched['bool_w2'].append(True) else: wrongly_matched['bool_w2'].append(False) if wb: wrongly_matched['bool_wb'].append(True) else: wrongly_matched['bool_wb'].append(False) if w1: wrongly_matched['average_speed_1'].append(np.mean(w1)) else: wrongly_matched['average_speed_1'].append(0) if w2: wrongly_matched['average_speed_2'].append(np.mean(w2)) else: wrongly_matched['average_speed_2'].append(0) wrongly_matched['length_trajectory'].append(len(traj)) wrongly_matched = pd.DataFrame(wrongly_matched) return wrongly_matched
def get_label_probabilities(brainweb_file, labels=None, outres="mMR", progress=True, dtype=np.float32): """ @param labels : list of strings, [default: Act.all_labels] @return out : 4D array of masks resampled as per `outres` (useful for PVC) """ out_shape = getattr(Shape, outres) raw_data = load_file(brainweb_file) if labels is None: labels = Act.all_labels if set(labels).difference(Act.all_labels): raise KeyError("labels (%s) must be in Act.all_labels (%s)" % (", ".join(labels), ", ".join(Act.all_labels))) num_classes = len(labels) res = np.zeros((num_classes, ) + tuple(out_shape), dtype=dtype) for i, attr in tenumerate(labels, unit="label", desc="BrainWeb labels", disable=not progress): class MAct(Act): attrs = [attr] setattr(MAct, attr, 1) res[i] = toPetMmr(raw_data, outres=outres, modes=[MAct])[0][:, ::-1] return res
def match_fixed_distance(self, list_index=None): # logger = lm.logger # logger.setLevel(logging.DEBUG) # logger.addHandler(logging.StreamHandler(sys.stdout)) tic = time.time() traj_mov_match = [] special_cases = [] point_traj = self.list_traj if list_index is None: point_traj = [ j for i, j in enumerate(point_traj) if i in list_index ] for i, j in tenumerate(point_traj): while True: try: traj = map_matching(j, self.gdf_netw, self.map, self.max_init, self.max_d) traj_mov_match.append(traj) break except Exception: special_cases.append(j) break toc = time.time() print( f'{int(divmod(toc - tic, 60)[0])} min {int(divmod(toc - tic, 60)[1])} sec' ) return traj_mov_match, special_cases
def get_data( use_cache: bool = True, num_wavelens: int = 300 ) -> Tuple[LaserParams, Emiss, torch.LongTensor]: """Data is sorted in ascending order of wavelength.""" if all( [ use_cache, Path("/data-new/alok/laser/data.pt").exists(), ] ): data = torch.load(Path("/data-new/alok/laser/data.pt")) norm_laser_params, interp_emissivities, uids = ( data["normalized_laser_params"], data["interpolated_emissivity"], data["uids"], ) # XXX check length to avoid bugs. if interp_emissivities.shape[-1] == num_wavelens: return norm_laser_params, interp_emissivities, uids client = pymongo.MongoClient( "mongodb://*****:*****@mongodb07.nersc.gov/propopt" ) db = client.propopt.laser_samples2 laser_params, emissivity, wavelength = [], [], [] interp_emissivities, interp_wavelengths = [], [] uids = [] # TODO: clean up and generalize when needed # the values are indexes for one hot vectorization wattage_idxs = { 0.2: 0, 0.3: 1, 0.4: 2, 0.5: 3, 0.6: 4, 0.7: 5, 0.8: 6, 0.9: 7, 1.0: 8, 1.1: 9, 1.2: 10, 1.3: 11, # these last 2 wattages are problematic since their # emissivities are different lengths # 1.4: 12, # 1.5: 13, } # TODO: relax this to all wattages, try discretizing them w/ # softmax instead for uid, entry in tenumerate(db.find()): # TODO: ensure that this is sorted by wavelength # TODO log transform? emiss_plot: List[float] = [ e for ex in entry["emissivity_spectrum"] if ((e := ex["normal_emissivity"]) != 1.0 and ex["wavelength_micron"] < 12) ]
def test_enumerate_numpy(): """Test contrib.tenumerate(numpy.ndarray)""" try: import numpy as np except ImportError: raise SkipTest with closing(StringIO()) as our_file: a = np.random.random((42, 1337)) assert list(tenumerate(a, file=our_file)) == list(np.ndenumerate(a))
def get_progress_bar(self, data_loader, total, description): if get_rank() == 0: pbar = tenumerate(data_loader, total=total // data_loader.batch_size + 1, desc=description, leave=False) else: pbar = enumerate(data_loader) yield pbar if hasattr(pbar, 'close'): pbar.close()
def make_line_trajectories(self): traj_line_match = [] gdf_netw = self.used_network for i, j in tenumerate(self.point_trajectories): tr_m = pd.merge(j, gdf_netw[['bearing', 'edge']], how='left', on=['edge']) tr_m = tr_m.rename(columns={ 'bearing_x': 'bearing', 'bearing_y': 'bearing_edge' }) diff = tr_m[['bearing', 'bearing_edge']].values bearing_diff = [ round(abs(diff[a][0] - diff[a][1]), 1) for a in range(0, len(tr_m)) ] for a, b in enumerate(bearing_diff): if b > 180: bearing_diff[a] = round(360 - b, 1) j['wrong_match'] = bearing_diff # point dataset with nodes of matched edge, this adds column to all original dataframes (chained assignment) tr = j[:-1] # making line dataset --> always start and end point --> last point has no successive point --> -1 u_edge = j['edge'].values[:-1] v_edge = j['edge'].values[1:] w_1 = j['wrong_match'].values[:-1] w_2 = j['wrong_match'].values[1:] w = tuple(zip(w_1, w_2)) c = { 'u_match': u_edge, 'v_match': v_edge, 'time': tr['time'].values + 1000, 'wrong_match': w } df = pd.DataFrame(c) p = [ LineString( [j['geometry'].values[k], j['geometry'].values[k + 1]]) for k in range(0, len(j) - 1) ] tr = tr.drop([ 'geometry', 'time', 'N1_match', 'N2_match', 'wrong_match', 'edge' ], axis=1) tr = pd.concat([tr, df], axis=1) tr = gpd.GeoDataFrame(tr, geometry=p) tr = pd.merge(tr, j.iloc[1:, 8:15], how='inner', on=['time']) traj_line_match.append(tr) self.line_trajectories = traj_line_match
def xml_to_yolo_3(boundingBoxes, yolo_dir, names_to_labels, ratio=0.8, shuffled=True): train_dir = os.path.join(yolo_dir, 'train') val_dir = os.path.join(yolo_dir, 'val') train_file = os.path.join(yolo_dir, 'train.txt') val_file = os.path.join(yolo_dir, 'val.txt') if not os.path.isdir(yolo_dir): os.mkdir(yolo_dir) if not os.path.isdir(train_dir): os.mkdir(train_dir) if not os.path.isdir(val_dir): os.mkdir(val_dir) boxes_by_name = boundingBoxes.getBoxesBy(lambda box: box.getImageName()) names = sorted(boxes_by_name.keys()) new_names = [] if shuffled == True: random_gen = random.Random(498_562_751) names = random_gen.sample(names, len(names)) number_train = round(ratio*len(names)) for (i, name) in tenumerate(names): yolo_rep = [] img_path = os.path.splitext(name)[0] + '.jpg' idenfier = 'im_{}'.format(i) new_names.append(idenfier + ".jpg") save_dir = train_dir if i < number_train else val_dir for box in boxes_by_name[name]: label = names_to_labels[box.getClassId()] x, y, w, h = box.getRelativeBoundingBox() yolo_rep.append('{} {} {} {} {}\n'.format(label, x, y, w, h)) with open(os.path.join(save_dir, idenfier + '.txt'), 'w') as f_write: f_write.writelines(yolo_rep) shutil.copy(img_path, os.path.join(save_dir, idenfier + '.jpg')) with open(train_file, "w") as f: for item in new_names[:number_train]: relative_path = os.path.split(item)[1] new_path = os.path.join("data/train/", relative_path) f.write(new_path + "\n") with open(val_file, "w") as f: for item in new_names[number_train:]: relative_path = os.path.split(item)[1] new_path = os.path.join("data/val/", relative_path) f.write(new_path + "\n")
def evaluate_reconstruction(autoencoder: StyleganAutoencoder, data_loaders: dict) -> dict: metrics = defaultdict(list) psnr_ssim_evaluator = PSNRSSIMEvaluator() for i, batch in tenumerate(data_loaders['test'], desc="psnr_ssim", leave=False): batch = {k: v.to('cuda') for k, v in batch.items()} with torch.no_grad(): denoised = autoencoder(batch['input_image']) psnr, ssim = psnr_ssim_evaluator.psnr_and_ssim(denoised, batch['output_image']) metrics['psnr'].append(float(psnr.cpu().numpy())) metrics['ssim'].append(float(ssim.cpu().numpy())) metrics = {k: statistics.mean(v) for k, v in metrics.items()} return metrics
def download_work(out_dir, work_url): work_req = requests.get(work_url) work_name = work_url.split("/")[4] work_dir = out_dir / work_name work_dir.mkdir(exist_ok=True) soup = BeautifulSoup(work_req.text, features="html.parser") image_tags = soup.find_all(class_="panelarea") for i, tag in tenumerate(image_tags, desc=f"{work_name}"): image_url = tag.get("href") output_name = image_url.split("/")[-1] output_filename = work_dir / f"{i:03}-{output_name}" image_req = requests.get(image_url) with open(output_filename, "wb") as f: f.write(image_req.content)
def _run_solver(solver, t_span: Tuple[float, float], dt: float, desc: str = "solver") -> SolverResult: """Given a numerical integrator, call its 'step' method T/dt times (where T is last element of t_span).""" t_start, t_end = t_span t_arr = np.arange(t_start, t_end + dt, dt) y_arr = np.zeros(shape=(len(t_arr), len(solver.y))) logger.info(f"{len(t_arr)} iterations to do...") for i, t in tenumerate(t_arr, desc=desc): y_arr[i] = solver.y solver.step(t, dt) # add final y y_arr[-1] = solver.y return SolverResult(t_arr, y_arr.T, None, None, None, 0, 0, 0, 1, "success", True)
def match_variable_distance(self, list_index=None): # logger = lm.logger # logger.setLevel(logging.DEBUG) # logger.addHandler(logging.StreamHandler(sys.stdout)) tic = time.time() traj_mov_match = [] fails = [] point_traj = self.list_traj if list_index is not None: point_traj = [ j for i, j in enumerate(point_traj) if i in list_index ] for i, j in tenumerate(point_traj): start_time = time.time() dist_init = self.max_init dist = self.max_d fail = 0 while True: try: traj = map_matching(j, self.gdf_netw, self.map, dist_init, dist) traj_mov_match.append(traj) break except Exception: if fail < 3: # print('Set distance higher:') dist += 5 fail = fail + 1 # print(dist) # print('Number of fails: ' + str(fail)) elif 2 < fail <= 10: dist += 10 fail = fail + 1 # print('Set distance higher:') # print(dist) # print('Number of fails: ' + str(fail)) elif fail > 10: dist += 10 dist_init += 50 fail += 1 # print('Still at list ' + str(i)) fails.append(fail) toc = time.time() print( f'{int(divmod(toc - tic, 60)[0])} min {int(divmod(toc - tic, 60)[1])} sec' ) return traj_mov_match
def get_wordlist(corpus): word_list = [] word_count = 0 vocab = set() if corpus == 'reuters': corp = reuters file_ids = corp.fileids() for file_ix, f in tenumerate(file_ids, desc='articles'): if f.startswith('train'): #for word_ix, word in tenumerate(reuters.words(f), desc='words'): word_list = reuters.words(f) word_count += len(word_list) for word_ix, word in enumerate(word_list): vocab.add(word) return vocab, word_count else: return None, None
def select_rows(self, segment_index=None): gdf_list = self.point_trajectories gdf_netw = self.used_network if segment_index is None: segment_index = list(np.arange(0, len(gdf_netw), 1)) traj_eval = [] for ind, traj in tenumerate(gdf_list): tr = traj.drop(['Lon', 'Lat'], axis=1) tr_first = tr.drop_duplicates('N1_match', keep='first') tr_first = tr_first.rename(columns={ 'N1_match': 'N1', 'N2_match': 'N2' }) idx_first = list(tr_first.index) tr_first = pd.merge( tr_first, gdf_netw[['N1', 'N2', 'Long1', 'Lat1', 'length']].loc[segment_index], how='left', on=['N1', 'N2']) tr_first = tr_first.rename(columns={'Long1': 'Lon', 'Lat1': 'Lat'}) tr_first = tr_first.rename(columns={'Long1': 'Lon', 'Lat1': 'Lat'}) tr_first = tr_first.assign(index=idx_first) tr_last = tr.drop_duplicates('N1_match', keep='last') tr_last = tr_last.rename(columns={ 'N1_match': 'N1', 'N2_match': 'N2' }) idx_last = list(tr_last.index) tr_last = pd.merge( tr_last, gdf_netw[['N1', 'N2', 'Long2', 'Lat2', 'length']].loc[segment_index], how='left', on=['N1', 'N2']) tr_last = tr_last.rename(columns={'Long2': 'Lon', 'Lat2': 'Lat'}) tr_last = tr_last.assign(index=idx_last) tr_sel = pd.concat([tr_first, tr_last]) tr_sel = tr_sel.sort_values(by='index') df = traj.loc[idx_first + idx_last] df = df.sort_index() traj_eval.append([tr_sel, df]) return traj_eval
def evaluate_denoising(args): config = load_config(args.model_checkpoint, None) args.test_dataset = Path(args.test_dataset) assert config['denoising'] is True or config['black_and_white_denoising'] is True, "you are supplying a train run that has not been trained for denoising! Aborting" autoencoder = get_autoencoder(config).to(args.device) autoencoder = load_weights(autoencoder, args.model_checkpoint, key='autoencoder', strict=True) config['batch_size'] = 1 data_loader = build_data_loader(args.test_dataset, config, config['absolute'], shuffle_off=True, dataset_class=DenoisingEvaluationDataset) metrics = defaultdict(list) psnr_ssim_evaluator = PSNRSSIMEvaluator() train_run_root_dir = Path(args.model_checkpoint).parent.parent evaluation_root = train_run_root_dir / 'evaluation' / f"denoise_{args.dataset_name}" evaluation_root.mkdir(parents=True, exist_ok=True) for i, batch in tenumerate(data_loader, leave=False): batch = {k: v.to(args.device) for k, v in batch.items()} with torch.no_grad(): denoised = autoencoder(batch['noisy']) noisy = clamp_and_unnormalize(batch['noisy']) original = clamp_and_unnormalize(batch['original']) denoised = clamp_and_unnormalize(denoised) if args.save: save_dir = evaluation_root / "qualitative" / args.test_dataset.stem save_dir.mkdir(exist_ok=True, parents=True) save_images([original[0], noisy[0], denoised[0]], save_dir, i) psnr, ssim = psnr_ssim_evaluator.psnr_and_ssim(denoised, original) metrics['psnr'].append(float(psnr.cpu().numpy())) metrics['ssim'].append(float(ssim.cpu().numpy())) metrics = {k: statistics.mean(v) for k, v in metrics.items()} evaluation_file = evaluation_root / f'denoising_{args.test_dataset.stem}.json' with evaluation_file.open('w') as f: json.dump(metrics, f, indent='\t')
def map_matching_result_split(self): trajectories_moving = [] netw = [] nan_traj = [] for i, j in tenumerate(self.list_mm): j[0]['edge'] = list( zip(j[0]['N1_match'].values, j[0]['N2_match'].values)) if np.any(np.isnan(np.sum(j[0]['edge'].values))): nan_traj.append(j[0]) continue trajectories_moving.append(j[0]) netw.append(j[1]) used_network = pd.concat(netw, axis=0) used_network.drop_duplicates(subset=['N1', 'N2'], inplace=True) used_network.reset_index(inplace=True, drop=True) used_network['edge'] = [ tuple(xy) for xy in zip(used_network['N1'], used_network['N2']) ] self.point_trajectories = trajectories_moving self.nan_traj = nan_traj self.used_network = used_network
def parse_submission(lines): submissionlist = [] for i, line in tenumerate(lines): try: jline = json.loads(line) except UnicodeDecodeError: print ('Decoding error on line %i', i) continue #cheaper than going over every key try: dummy = jline['author'] except KeyError: jline['author'] = None submissionlist.append(Submission(jline['subreddit'], jline['author'], jline['name'], jline['title'], jline['url'], jline['selftext'])) return submissionlist
def process_data(cur, conn, filepath, func): """ Loads all files in the specified filepath and applies func to each file. :param cur: the database cursor. :param conn: the database connection. :param filepath: filepath to load files from. :param func: function to apply to each file. """ # get all files matching extension from directory all_files = [] for root, dirs, files in os.walk(filepath): files = glob.glob(os.path.join(root, "*.json")) for f in files: all_files.append(os.path.abspath(f)) # get total number of files found num_files = len(all_files) print("{} files found in {}".format(num_files, filepath)) # iterate over files and process for i, datafile in tenumerate(all_files, 1): func(cur, datafile) conn.commit()
def match_fixed_distance(self, list_index=None, logger=False, **kwargs): if logger: logger = lm.logger logger.setLevel(logging.DEBUG) logger.addHandler(logging.StreamHandler(sys.stdout)) tic = time.time() traj_mov_match = [] special_cases = [] point_traj = self.list_traj if list_index is not None: point_traj = [ j for i, j in enumerate(point_traj) if i in list_index ] for i, j in tenumerate(point_traj): while True: try: traj = map_matching(j, self.network_edges, self.map, self.max_init, self.max_d, latlon=self.match_latlon, **kwargs) traj = traj.merge(self.network_edges[['_id', 'n1', 'n2']], how='left', on=['n1', 'n2']) traj_mov_match.append(traj) break except Exception: special_cases.append(j) break toc = time.time() print( f'{int(divmod(toc - tic, 60)[0])} min {int(divmod(toc - tic, 60)[1])} sec' ) return traj_mov_match, special_cases
import torch from datasets import load_dataset from sklearn.metrics import roc_auc_score from torch.utils.data import DataLoader from tqdm.contrib import tenumerate import numpy as np from project.binary_bert.utils import load_binary_bert dataset = load_dataset("civil_comments", split='test') dataloader = DataLoader(dataset, batch_size=8) model, tokenizer, class_names = load_binary_bert() true, pred = defaultdict(list), defaultdict(list) model.eval() with torch.no_grad(): for id, batch in tenumerate(dataloader, total=len(dataloader)): inputs = tokenizer(batch['text'], return_tensors="pt", truncation=True, padding=True).to(model.device) out = model(inputs['input_ids']) scores = torch.sigmoid(out[0]).cpu().detach().numpy() results = {} for i, cla in enumerate(class_names): results[cla] = ( scores[0][i] if isinstance(batch['text'], str) else [scores[ex_i][i].tolist() for ex_i in range(len(scores))]) if cla == 'identity_hate': batch_cla = 'identity_attack' else: batch_cla = cla
def enumerator(iterable: Iterable, verbose: bool, **kwargs): if not verbose: return enumerate(iterable) return tenumerate(iterable, **kwargs)
def test_enumerate_numpy(): """Test contrib.tenumerate(numpy.ndarray)""" np = importorskip('numpy') with closing(StringIO()) as our_file: a = np.random.random((42, 7)) assert list(tenumerate(a, file=our_file)) == list(np.ndenumerate(a))
def test_enumerate(): """Test contrib.tenumerate""" with closing(StringIO()) as our_file: a = range(9) assert list(tenumerate(a, file=our_file)) == list(enumerate(a)) assert list(tenumerate(a, 42, file=our_file)) == list(enumerate(a, 42))
def test(self, merge=False, merge_size=0): with fluid.dygraph.guard(): """ Test """ """ Network """ self.generator_ema = Generator(self.img_size, self.img_ch, self.style_dim, max_conv_dim=self.hidden_dim, sn=False, w_hpf=self.w_hpf) self.mapping_network_ema = MappingNetwork(self.style_dim, self.hidden_dim, self.num_domains, sn=False) self.style_encoder_ema = StyleEncoder(self.img_size, self.style_dim, self.num_domains, max_conv_dim=self.hidden_dim, sn=False) self.fan = FAN(fname_pretrained='fan') """ Load model """ self.load_model(choice='test') source_path = os.path.join(self.test_dataset_path, 'src_imgs') source_images = glob(os.path.join(source_path, '*.png')) + glob(os.path.join(source_path, '*.jpg')) source_images = sorted(source_images) # reference-guided synthesis print('reference-guided synthesis') reference_path = os.path.join(self.test_dataset_path, 'ref_imgs') reference_images = [] reference_domain = [] for idx, domain in enumerate(self.domain_list): image_list = glob(os.path.join(reference_path, domain) + '/*.png') + glob( os.path.join(reference_path, domain) + '/*.jpg') image_list = sorted(image_list) domain_list = [[idx]] * len(image_list) # [ [0], [0], ... , [0] ] reference_images.extend(image_list) reference_domain.extend(domain_list) if merge: src_img = None ref_img = None ref_img_domain = None if merge_size == 0: # [len_src_imgs : len_ref_imgs] matching for src_idx, src_img_path in tenumerate(source_images): src_name, src_extension = os.path.splitext(src_img_path) src_name = os.path.basename(src_name) src_img_ = load_images(src_img_path, self.img_size, self.img_ch) # [img_size, img_size, img_ch] src_img_ = paddle.fluid.layers.unsqueeze(src_img_, axes=[0]) if src_idx == 0: src_img = src_img_ else: src_img = paddle.fluid.layers.concat([src_img, src_img_], axis=0) for ref_idx, (ref_img_path, ref_img_domain_) in tenumerate(zip(reference_images, reference_domain)): ref_name, ref_extension = os.path.splitext(ref_img_path) ref_name = os.path.basename(ref_name) ref_img_ = load_images(ref_img_path, self.img_size, self.img_ch) # [img_size, img_size, img_ch] ref_img_ = paddle.fluid.layers.unsqueeze(ref_img_, axes=[0]) ref_img_domain_ = np.mat(ref_img_domain_) ref_img_domain_ = fluid.dygraph.to_variable(np.array(ref_img_domain_)) if ref_idx == 0: ref_img = ref_img_ ref_img_domain = ref_img_domain_ else: ref_img = paddle.fluid.layers.concat([ref_img, ref_img_], axis=0) ref_img_domain = paddle.fluid.layers.concat([ref_img_domain, ref_img_domain_], axis=0) save_path = './{}/ref_all.jpg'.format(self.result_dir) self.refer_canvas(src_img, ref_img, ref_img_domain, save_path, img_num=[len(source_images), len(reference_images)]) else: # [merge_size : merge_size] matching src_size = 0 for src_idx, src_img_path in tenumerate(source_images): src_name, src_extension = os.path.splitext(src_img_path) src_name = os.path.basename(src_name) src_img_ = load_images(src_img_path, self.img_size, self.img_ch) # [img_size, img_size, img_ch] src_img_ = paddle.fluid.layers.unsqueeze(src_img_, axes=[0]) if src_size < merge_size: if src_idx % merge_size == 0: src_img = src_img_ else: src_img = paddle.fluid.layers.concat([src_img, src_img_], axis=0) src_size += 1 if src_size == merge_size: src_size = 0 ref_size = 0 for ref_idx, (ref_img_path, ref_img_domain_) in enumerate( zip(reference_images, reference_domain)): ref_name, ref_extension = os.path.splitext(ref_img_path) ref_name = os.path.basename(ref_name) ref_img_ = load_images(ref_img_path, self.img_size, self.img_ch) # [img_size, img_size, img_ch] ref_img_ = paddle.fluid.layers.unsqueeze(ref_img_, axes=[0]) ref_img_domain_ = paddle.fluid.layers.unsqueeze(ref_img_domain_, axes=[0]) if ref_size < merge_size: if ref_idx % merge_size == 0: ref_img = ref_img_ ref_img_domain = ref_img_domain_ else: ref_img = paddle.fluid.layers.concat([ref_img, ref_img_], axis=0) ref_img_domain = paddle.fluid.layers.concat( [ref_img_domain, ref_img_domain_], axis=0) ref_size += 1 if ref_size == merge_size: ref_size = 0 save_path = './{}/ref_{}_{}.jpg'.format(self.result_dir, src_idx + 1, ref_idx + 1) self.refer_canvas(src_img, ref_img, ref_img_domain, save_path, img_num=merge_size) else: # [1:1] matching for src_img_path in tqdm(source_images): src_name, src_extension = os.path.splitext(src_img_path) src_name = os.path.basename(src_name) src_img = load_images(src_img_path, self.img_size, self.img_ch) # [img_size, img_size, img_ch] src_img = paddle.fluid.layers.unsqueeze(src_img, axes=[0]) for ref_img_path, ref_img_domain in zip(reference_images, reference_domain): ref_name, ref_extension = os.path.splitext(ref_img_path) ref_name = os.path.basename(ref_name) ref_img = load_images(ref_img_path, self.img_size, self.img_ch) # [img_size, img_size, img_ch] ref_img = paddle.fluid.layers.unsqueeze(ref_img, axes=[0]) ref_img_domain = paddle.fluid.layers.unsqueeze(ref_img_domain, axes=[0]) save_path = './{}/ref_{}_{}{}'.format(self.result_dir, src_name, ref_name, src_extension) self.refer_canvas(src_img, ref_img, ref_img_domain, save_path, img_num=1) # latent-guided synthesis print('latent-guided synthesis') for src_img_path in tqdm(source_images): src_name, src_extension = os.path.splitext(src_img_path) src_name = os.path.basename(src_name) src_img = load_images(src_img_path, self.img_size, self.img_ch) # [img_size, img_size, img_ch] src_img = paddle.fluid.layers.unsqueeze(src_img, axes=[0]) save_path = './{}/latent_{}{}'.format(self.result_dir, src_name, src_extension) self.latent_canvas(src_img, save_path)
batch_first=True, lower=True, stop_words=set(string.punctuation)) LABEL = data.Field(dtype=torch.float, is_target=True, unk_token=None, sequential=False, use_vocab=False) df_dataset = pd.read_csv(f'data/{dataset}/data.csv') entire_dataset = DataFrameDataset(df_dataset, { 'text': TEXT, 'label': LABEL }) df_dataset = pd.read_csv(f'data/{dataset}/data.csv') tokenized_input = [] for i, example in tenumerate(entire_dataset.examples): words = list(example.text) if len(words) > 0: tokenized_input.append(' '.join(words)) else: tokenized_input.append(None) df_dataset['text'] = tokenized_input df_dataset = df_dataset.replace(to_replace='None', value=np.nan).dropna() df_dataset.to_csv(f'data/{dataset}/tokenized_data.csv', index=False, quoting=csv.QUOTE_NONNUMERIC)
options=self.options) put_writer.write(table) put_writer.close() # Request a pyarrow.Table by name def get_table(self, name): reader = self.con.do_get(flight.Ticket(name.encode('utf8')), options=self.options) return reader.read_all() def list_actions(self): return self.con.list_actions() ipc_options = pa.ipc.IpcWriteOptions(compression='zstd') options = flight.FlightCallOptions(write_options=ipc_options) client = DemoClient(location, options=options) dataset, files_list = get_s3_dataset("s3://molbeam/tested") for count, table in tenumerate(dataset.to_batches(columns=["canonical_ID", "enumerated_smiles", "achiral_fp"]), total=len(files_list)): client.cache_table_in_server(files_list[count], table) @stopwatch def get_single_table_from_flight_server(target): table_received = client.get_table(target) return table_received recieved_table = get_table_from_flight_server(files_list[0]) print(recieved_table)
def distance_point_to_matched_edge(self): distances = { 'ID': [], 'Type': [], 'length_traj': [], 'max_distance': [], 'median_distance': [], 'mean_distance': [], '99_percentile': [], 'length_diff': [], 'length_diff_rel': [] } #, 'frechet_distance': []} list_distances_traj = [] for ind, traj in tenumerate(self.point_trajectories): distances['ID'].append((ind, traj['Tracked Vehicle'].values[0])) distances['Type'].append(traj['Type'].values[0]) distances['length_traj'].append(len(traj)) dist = [] mapped_length = 0 traj_val = traj[['Lon', 'Lat']].values xy_crds = converting_path_to_xy(traj) p_xy = xy_crds[['x', 'y']].values path_length = similaritymeasures.get_arc_length(p_xy) # print(path_length[0]) traj_match = traj.rename(columns={ 'N1_match': 'N1', 'N2_match': 'N2' }) match_df = pd.merge(traj_match[['edge']], self.used_network[[ 'N1', 'Lat1', 'Long1', 'N2', 'Lat2', 'Long2', 'length', 'edge' ]], how='left', on=['edge']) match_val = match_df[[ 'Lat1', 'Long1', 'Lat2', 'Long2', 'edge', 'length' ]].values #q_1 = [xy for xy in zip(match_df.Lat1.values, match_df.Long1)] #idx = [i for i in range(len(q_1)) if q_1[i] != q_1[i-1]] #q_1 = list(match_df[['Lat1', 'Long1']].loc[idx].values) #if len(q_1) < 1: # Interpolated points have to be appended # q_1 = [0] #q_1 = [] for row in range(0, len(traj)): p = (traj_val[row][1], traj_val[row][0]) # Lat-lon order s1 = (match_val[row][0], match_val[row][1]) s2 = (match_val[row][2], match_val[row][3]) d, pi, ti = lm_dist.distance_point_to_segment(p, s1, s2) dist.append(d) if row == 0: mapped_length += match_val[row][5] * (1 - ti) #q_1.append(pi) elif row == len(traj) - 1: mapped_length += match_val[row][5] * ti #q_1.append(pi) elif 0 < row and match_val[row][4] != match_val[row - 1][4]: #q_1.append(pi) mapped_length += match_val[row][5] if match_val[len(traj) - 2][4] == match_val[len(traj) - 1][4]: mapped_length -= match_val[len(traj) - 1][5] # print(mapped_length) diff_len = abs(path_length[0] - mapped_length) list_distances_traj.append(dist) #q_xy = converting_list_to_xy(q_1) #d_fr = similaritymeasures.frechet_dist(p_xy, q_xy.values) distances['max_distance'].append(max(dist)) distances['median_distance'].append(np.median(dist)) distances['mean_distance'].append(np.mean(dist)) distances['99_percentile'].append(np.percentile(dist, 99)) distances['length_diff'].append(diff_len) distances['length_diff_rel'].append(diff_len / path_length[0]) #distances['frechet_distance'].append(d_fr) distances = pd.DataFrame(distances) return distances, list_distances_traj