def get_info_for_data_loader(): mode = config_init.get_str_info(section='experiment_info', key='mode') input_channel = config_init.get_int_info(section='experiment_info', key='input_channel') img_width = config_init.get_int_info(section='experiment_info', key='img_width') num_class = config_init.get_int_info(section='experiment_info', key='num_class') rot_num = config_init.get_int_info(section='augmentation_info', key='rot_num') rewrite = config_init.get_bool_info(section='experiment_info', key='rewrite') data_path, hdf5_path = '', '' if mode == 'oc': data_path = config_init.get_str_info(section='data_path', key='oc_data_path') hdf5_path = config_init.get_str_info(section='data_path', key='oc_hdf5_path') if mode == 'od': data_path = config_init.get_str_info(section='data_path', key='od_data_path') hdf5_path = config_init.get_str_info(section='data_path', key='od_hdf5_path') create_dir(hdf5_path) return input_channel, img_width, num_class, rot_num, rewrite, data_path, hdf5_path
def compute_target(answers_dset, ans2label, name, cache_root): """ Augment answers_dset with soft score as label. """ target = [] for ans_entry in answers_dset: answers = ans_entry['answers'] answer_count = {} for answer in answers: answer_ = answer['answer'] answer_count[answer_] = answer_count.get(answer_, 0) + 1 labels, scores = [], [] for answer in answer_count: if answer not in ans2label: continue labels.append(ans2label[answer]) score = get_score(answer_count[answer]) scores.append(score) target.append({ 'question_type': ans_entry['question_type'], 'question_id': ans_entry['question_id'], 'image_id': ans_entry['image_id'], 'labels': labels, 'scores': scores, }) utils.create_dir(cache_root) cache_file = os.path.join(cache_root, name + '_target.json') json.dump(target, open(cache_file, 'w'))
def test_imagenet(model, model_name): # data loader: imagenet_dir = os.path.join('/hdd3/haotao/imagenet_pytorch_download') if model_name == 'effnetE7': val_loader = effnet_loader(imagenet_dir, args.batch_size) else: _, val_loader = imagenet_loader(imagenet_dir, args.batch_size, args.batch_size, img_size=args.img_size, mean=args.mean, std=args.std) # testing: start_time = time.time() correct_total, total = 0, 0 probs_lst, preds_lst, labels_lst = [], [], [] for batch_idx, (images, labels) in enumerate(val_loader): images = images.cuda() labels = labels.cuda() labels_lst.append(labels.cpu().numpy()) N = labels.size()[0] total += N logits = model(images) probs = nn.functional.softmax(logits) probs_lst.append(probs.detach().cpu().numpy()) preds = torch.argmax(logits, dim=1) preds_lst.append(preds.cpu().numpy()) correct = torch.sum(preds == labels) correct_total += correct # print if batch_idx % 10 == 0: print('batch %d images:' % batch_idx, images.size()) print('correct/batch_size: %d/%d' % (correct, N)) prediction_dir = 'imagenet_predictions' create_dir(prediction_dir) testing_time = time.time()-start_time result_str = '%s corect_num/total: %d/%d, acc: %.2f, time: %s' % (model_name, correct_total, total, float(correct_total)/float(total)*100, testing_time) print(result_str) f = open(os.path.join(prediction_dir, "%s_evaluate_result.txt" % model_name), "w+") f.write(result_str) f.close() preds_all = np.concatenate(preds_lst, axis=0) probs_all = np.concatenate(probs_lst, axis=0) labels_all = np.concatenate(labels_lst, axis=0) np.save(os.path.join(prediction_dir, '%s_preds.npy' % model_name), preds_all) np.save(os.path.join(prediction_dir, '%s_probs.npy' % model_name), probs_all) np.save(os.path.join(prediction_dir, 'labels.npy'), labels_all) print('labels_all:', labels_all[0:10])
def split_data(traj_input_dir, output_dir): """ split original data to train, valid and test datasets """ create_dir(output_dir) train_data_dir = output_dir + 'train_data/' create_dir(train_data_dir) val_data_dir = output_dir + 'valid_data/' create_dir(val_data_dir) test_data_dir = output_dir + 'test_data/' create_dir(test_data_dir) trg_parser = ParseMMTraj() trg_saver = SaveTraj2MM() for file_name in tqdm(os.listdir(traj_input_dir)): traj_input_path = os.path.join(traj_input_dir, file_name) trg_trajs = np.array(trg_parser.parse(traj_input_path)) ttl_lens = len(trg_trajs) test_inds = random.sample(range(ttl_lens), int(ttl_lens * 0.1)) # 10% as test data tmp_inds = [ind for ind in range(ttl_lens) if ind not in test_inds] val_inds = random.sample(tmp_inds, int(ttl_lens * 0.2)) # 20% as validation data train_inds = [ind for ind in tmp_inds if ind not in val_inds] # 70% as training data trg_saver.store(trg_trajs[train_inds], os.path.join(train_data_dir, 'train_' + file_name)) print("target traj train len: ", len(trg_trajs[train_inds])) trg_saver.store(trg_trajs[val_inds], os.path.join(val_data_dir, 'val_' + file_name)) print("target traj val len: ", len(trg_trajs[val_inds])) trg_saver.store(trg_trajs[test_inds], os.path.join(test_data_dir, 'test_' + file_name)) print("target traj test len: ", len(trg_trajs[test_inds]))
def create_ans2label(occurence, name, cache_root): """ Map answers to label. """ label, label2ans, ans2label = 0, [], {} for answer in occurence: label2ans.append(answer) ans2label[answer] = label label += 1 utils.create_dir(cache_root) cache_file = os.path.join(cache_root, name + '_ans2label.json') json.dump(ans2label, open(cache_file, 'w')) cache_file = os.path.join(cache_root, name + '_label2ans.json') json.dump(label2ans, open(cache_file, 'w')) return ans2label
def __init__(self): # First, make sure that the target dir exists self.log_dir = os.path.join(".", "mapping", "model_training", "training_logs") create_dir(self.log_dir) # Get a string of the current time to the second file_str = datetime.datetime.now().strftime("%y_%m_%d__%H_%M_%S_%f") # Make a file path for the new logger output self.file_dir = os.path.join(self.log_dir, file_str) assert not os.path.exists( self.file_dir ), f"{self.file_dir} already exists. This probably shouldn't happen unless the TrainingLogger is initialized at the same millisecond as another logger. Please investigate." # We start the logging by stating the date readable_date = datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S") self.log(f"Training log commencing at {readable_date}:\n")
def output_embeddings(self, embedding, df): # Make sure the embedding folder exists dataset_embedding_folder = os.path.join(self.output_dataset_dir, self.test_dataset) create_dir(dataset_embedding_folder) app_embedding_folder = os.path.join(dataset_embedding_folder, self.app_name) create_dir(app_embedding_folder) # Output the embeddings to a csv file in the embeddings folder app_embedding_file = os.path.join(app_embedding_folder, f"{self.mapping_name}.csv") # Get Dataframe from embeddings array embedding_df = pd.DataFrame(embedding, index=df.index) # Add labels to embeddings from passed df embedding_df["label"] = df["label"] embedding_df.to_csv(app_embedding_file)
def sample(sample_inputs, model, epoch, save_dir, teacher_forcing_ratio=0): model.eval() sample_dir = os.path.join(save_dir, 'sample') utils.create_dir(sample_dir) model_type = model.module.model_type() if isinstance( model, torch.nn.DataParallel) else model.model_type() input_images, input_audios, gt_images = sample_inputs[0], sample_inputs[ 1], sample_inputs[2] with torch.no_grad(): if model_type == 'RNN': G_images = model(input_images, input_audios, valid_len=sample_inputs[3], teacher_forcing_ratio=teacher_forcing_ratio) G_images = G_images.contiguous().view( G_images.shape[0] * G_images.shape[1], G_images.shape[2], G_images.shape[3], G_images.shape[4]) input_images = input_images.contiguous().view( input_images.shape[0] * input_images.shape[1], input_images.shape[2], input_images.shape[3], input_images.shape[4]) gt_images = gt_images.contiguous().view( gt_images.shape[0] * gt_images.shape[1], gt_images.shape[2], gt_images.shape[3], gt_images.shape[4]) else: G_images = model(input_images, input_audios) # save input images utils.save_sample_images(input_images.cpu().detach().numpy(), os.path.join(sample_dir, 'input.png')) # save ground truth images utils.save_sample_images(gt_images.cpu().detach().numpy(), os.path.join(sample_dir, 'ground_truth.png')) # save generated images g_name = '{:02d}.png'.format(epoch + 1) utils.save_sample_images(G_images.cpu().detach().numpy(), os.path.join(sample_dir, g_name)) model.train()
def extract(self): batch_time = utils.AverageMeter() self.model.eval() end = time.time() for batch_idx, (imgs, target, img_files, class_ids) in tqdm.tqdm(enumerate(self.val_loader), total=len(self.val_loader), desc='Extract', ncols=80, leave=False): gc.collect() if self.cuda: imgs = imgs.cuda() imgs = Variable(imgs, volatile=True) output = self.model(imgs) # N C H W torch.Size([1, 1, 401, 600]) if self.flatten_feature: output = output.view(output.size(0), -1) output = output.data.cpu().numpy() assert output.shape[0] == len(img_files) for i, img_file in enumerate(img_files): base_name = os.path.splitext(img_file)[0] feature_file = os.path.join(self.feature_dir, base_name + ".npy") utils.create_dir(os.path.dirname(feature_file)) np.save(feature_file, output[i]) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % self.print_freq == 0: log_str = 'Extract: [{0}/{1}]\tTime: {batch_time.val:.3f} ({batch_time.avg:.3f})'.format( batch_idx, len(self.val_loader), batch_time=batch_time) print(log_str) self.print_log(log_str)
def main(): # get PCBmodE version version = utils.get_git_revision() # setup and parse commandline arguments argp = cmdArgSetup(version) cmdline_args = argp.parse_args() # Might support running multiple boards in the future, # for now get the first onw board_name = cmdline_args.boards[0] makeConfig(board_name, version, cmdline_args) # check if build directory exists; if not, create build_dir = os.path.join(config.cfg['base-dir'], config.cfg['locations']['build']) utils.create_dir(build_dir) # renumber refdefs and dump board config file if cmdline_args.renumber is not False: msg.info("Renumbering refdefs") if cmdline_args.renumber is None: order = 'top-to-bottom' else: order = cmdline_args.renumber.lower() utils.renumberRefdefs(order) # Extract routing from input SVG file elif cmdline_args.extract is True: extract.extract() # Create a BoM elif cmdline_args.make_bom is not False: bom.make_bom(cmdline_args.make_bom) else: # make the board if cmdline_args.make is True: msg.info("Creating board") board = Board() # Create production files (Gerbers, Excellon, etc.) if cmdline_args.fab is not False: if cmdline_args.fab is None: manufacturer = 'default' else: manufacturer = cmdline_args.fab.lower() msg.info("Creating Gerbers") gerber.gerberise(manufacturer) msg.info("Creating excellon drill file") excellon.makeExcellon(manufacturer) if cmdline_args.pngs is True: msg.info("Creating PNGs") utils.makePngs() filename = os.path.join(config.cfg['locations']['boards'], config.cfg['name'], config.cfg['locations']['build'], 'paths_db.json') try: f = open(filename, 'wb') except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) f.write(json.dumps(config.pth, sort_keys=True, indent=2)) f.close() msg.info("Done!")
def compare(model_name1, model_name2, distance_mode='min', weighted=True): ''' Compare the difference between two models predictions. Save results in .txt file and comparison details in a .csv file. Filter out the hard examples based these two criterias: 1. The wordnet tree distance is large; 2. One or both of them are confidence in their predictions. ''' # create_dir(compare_dir) if weighted: tree_distance_mode = 'weighted_' + distance_mode else: tree_distance_mode = 'unweighted_' + distance_mode create_dir(os.path.join(compare_dir, tree_distance_mode)) preds1 = np.load(os.path.join(prediction_dir, '%s_preds.npy' % model_name1)) preds2 = np.load(os.path.join(prediction_dir, '%s_preds.npy' % model_name2)) probs1 = np.load(os.path.join(prediction_dir, '%s_probs.npy' % model_name1)) probs2 = np.load(os.path.join(prediction_dir, '%s_probs.npy' % model_name2)) labels = np.loadtxt(COMMON_FLAGS.label_path) # make the prediction readable: with open(os.path.join(json_dir, 'imgnetid2word.json')) as fp: imgnetid2word = json.load(fp) words1 = [] for imgnetid in preds1: word = imgnetid2word[str(int(imgnetid))] words1.append(word) words1 = np.array(words1) words2 = [] for imgnetid in preds2: word = imgnetid2word[str(int(imgnetid))] words2.append(word) words2 = np.array(words2) # find disagree index: disagree_idx = np.argwhere(preds1 != preds2).squeeze() print('disagree_idx:', disagree_idx.shape, disagree_idx[0:20]) # build data frame: prediction_df = pd.DataFrame( { 'disagree_idx': disagree_idx.astype(int), 'preds1': preds1[disagree_idx].astype(int), 'words1': words1[disagree_idx], 'probs1': np.amax(probs1[disagree_idx], axis=1), 'preds2': preds2[disagree_idx].astype(int), 'words2': words2[disagree_idx], 'probs2': np.amax(probs2[disagree_idx], axis=1), 'gnd': labels[disagree_idx].astype(int) }, columns=[ 'disagree_idx', 'preds1', 'words1', 'probs1', 'preds2', 'words2', 'probs2', 'gnd' ]) # load json: with open(os.path.join(json_dir, 'code_with_imgnet_id.json'), 'r') as fp: code_with_imgnet_id = json.load(fp) # get tree distance and confident number: tree_dist_lst = [] for index, row in prediction_df.iterrows(): # trace label: int_label1 = int(row['preds1']) trace_label_lst1 = code_with_imgnet_id[str( int_label1)] # list, of a single or multiple string elements int_label2 = int(row['preds2']) trace_label_lst2 = code_with_imgnet_id[str(int_label2)] # all possible paths from leaf1 to leaf2: all_path_dist = [] for trace_label1 in trace_label_lst1: trace_label1 = trace_label1.split(' ') for trace_label2 in trace_label_lst2: trace_label2 = trace_label2.split(' ') # tree dist: path_dist = trace_distance(trace_label1, trace_label2, weighted=weighted) all_path_dist.append(path_dist) # There maybe multiple paths from leaf1 to leaf2. Each path may have different length. if distance_mode == 'ave': tree_dist = np.mean(all_path_dist) elif distance_mode == 'min': tree_dist = np.min(all_path_dist) else: raise Exception('wrong distance_mode %s' % distance_mode) # append this pair of different predictions to the list: tree_dist_lst.append(tree_dist) # add new column to the df: prediction_df['tree_dist'] = tree_dist_lst # save csv: prediction_df.to_csv(os.path.join( compare_dir, tree_distance_mode, "disagree_%s_%s.csv" % (model_name1, model_name2)), index=False) # save txt result_str = 'disagree_num_total: %d, total: %d' % (len(disagree_idx), preds1.shape[0]) print(result_str) f = open( os.path.join(compare_dir, tree_distance_mode, "compare_result_%s_%s.txt" % (model_name1, model_name2)), "w+") f.write(result_str) f.close()
logging.info( f'Failed to extract data for property {i}. Error: ' f'{str(e)}') continue else: break time.sleep(self.SLEEP) logging.info( f'Scraped {self.N_PAGES} pages containing {len(results)} properties.' ) logging.info('Organise data into a Pandas DataFrame') data = create_dataframe(*list(zip(*results))) return data if __name__ == '__main__': config_logging() scraper = CasaSapoScraper() df = scraper.get_all_properties(include_filters=True) logging.info('Create directory where the results will be stored') create_dir(OUTPUT_DIR) csv_filepath = OUTPUT_DIR / f'sapo_porto_properties_{NOW.year}{NOW.month}{NOW.day}.csv' logging.info(f'Save results to CSV file: {csv_filepath}') save_df_to_csv(df, csv_filepath)
def train_fasttext(corpusPath,corpusName,window,size,sg,epochs,min_count): total_start=time.time() modelName='ft_'+corpusName+'_w'+str(window)+'_d' +str(size)+'_sg' +str(sg)+'_ep' +str(epochs)+'_minC'+str(min_count) model = FastText(size=size, window=window, min_count=min_count,sg=sg) modelPath = ROOT_DIR+'/'+"models/fasttext/"+modelName+"/"+modelName+".model" #check if the model already exists dirPath = '/'.join(modelPath.split('/')[:-1]) if os.path.exists(dirPath): print("The model already exists! ",modelName) return None,None,None #if not, train else: print('#########################################################################') print("Ready to train the model ",modelName) df_cols=['modelName','total_duration','reading_corpus_duration', 'building_vocab_duration','training_duration','corpus_name','total_examples', 'window','size','sg','min_n','max_n','negative','word_ngrams','epochs','min_count','vocab_size'] rows=[] result = {'modelName':modelName,'total_duration': None,'reading_corpus_duration': None, 'building_vocab_duration': None,'training_duration': None,'corpus_name': corpusName,'total_examples': None, 'window':window,'size':size,'sg':sg,'min_n':3,'max_n':6,'negative':5, 'word_ngrams':1,'epochs':epochs,'min_count':None,'vocab_size':None} print("Reading corpus..."+corpusPath) startR=time.time() t=tokenizedCorpus() tCorpus = t(corpusPath) result['reading_corpus_duration'] = int(time.time()-startR) print("Building vocab...") startB = time.time() model.build_vocab(sentences=tCorpus) result['building_vocab_duration'] = int(time.time()-startB) print("Building done. building_vocab_duration=",result['building_vocab_duration']) total_examples = model.corpus_count result['total_examples'] = total_examples print("Total examples: "+str(total_examples)) #train print("Training model...: "+modelName) startT = time.time() model.train(sentences=tCorpus, total_examples=total_examples, epochs=epochs) result['training_duration'] = int(time.time()-startT) print("Training done.") result['total_duration'] = int(time.time()-total_start) print("Fasttext training time total: "+str(int(time.time()-total_start))) result['min_count']=model.vocabulary.min_count result['vocab_size']=len(model.wv.vectors_vocab) #save model info rows.append(result) df = pd.DataFrame(rows, columns = df_cols) #save to csv under the model's directory create_dir(modelPath) df.to_csv(os.path.dirname(modelPath)+'/model_info.csv', mode = 'a', index = False) return model,modelName,modelPath
def get_path_for_data(): mode = config_init.get_str_info(section='experiment_info', key='mode') img_data_path = '' if mode == 'oc': img_data_path = config_init.get_str_info(section='data_path', key='oc_data_path') if mode == 'od': img_data_path = config_init.get_str_info(section='data_path', key='od_data_path') train_img_file_path = img_data_path + 'train/img/' create_dir(train_img_file_path) train_label_file_path = img_data_path + 'train/label/' create_dir(train_label_file_path) validation_img_file_path = img_data_path + 'validation/img/' create_dir(validation_img_file_path) validation_label_file_path = img_data_path + 'validation/label/' create_dir(validation_label_file_path) test_img_file_path = img_data_path + 'test/img/' create_dir(test_img_file_path) test_label_file_path = img_data_path + 'test/label/' create_dir(test_label_file_path) crop_test_img_file_path = img_data_path + 'test/crop_img/' create_dir(crop_test_img_file_path) crop_test_label_file_path = img_data_path + 'test/crop_label/' create_dir(crop_test_label_file_path) test_predict_file_path = img_data_path + 'test/predict/' create_dir(test_predict_file_path) return train_img_file_path, train_label_file_path, validation_img_file_path, validation_label_file_path, test_img_file_path, test_label_file_path, crop_test_img_file_path, crop_test_label_file_path, test_predict_file_path
def __init__(self, test_dataset): self.test_dataset = test_dataset self.overall_dataset_dir = os.path.join(".", "data") # Raw dir - the umbrella folder where we keep our initially downloaded data here self.raw_dataset_dir = os.path.join(self.overall_dataset_dir, "raw") assert os.path.exists( self.raw_dataset_dir ), f"No raw datasets in the {self.raw_dataset_dir} path. Run data downloaders first." # Raw dir - the umbrella folder where we keep our initially downloaded data here self.raw_dataset_specific_dir = os.path.join(self.raw_dataset_dir, self.test_dataset) assert os.path.exists( self.raw_dataset_specific_dir ), f"No raw datasets in the {self.raw_dataset_specific_dir} path. Run data downloaders first." # Output dir - We keep our embeddings here self.output_dataset_dir = os.path.join(self.overall_dataset_dir, "embeddings") create_dir(self.output_dataset_dir) # Preprocessed dir self.preprocessed_dataset_dir = os.path.join(self.overall_dataset_dir, "preprocessed") create_dir(self.preprocessed_dataset_dir) # Auxiliary data dir - We keep extra data that we will not use for testing but that is used for training models self.base_auxiliary_dataset_dir = os.path.join( self.overall_dataset_dir, "auxiliary") create_dir(self.base_auxiliary_dataset_dir) # Get mapping name self.mapping_name = self.get_mapping_name() # Get the misc app label (the label we give to the collection of a few bits of feedback on many apps) self.misc_app_name = DownloadUtilBase().misc_app_name # Get the device that is available currently for torch training/inference self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') print(f"Available PyTorch device is {self.device}") # Model directory self.model_repo_dir = os.path.join(".", "mapping", "mapping_models", "saved_models") create_dir(self.model_repo_dir) self.model_dir = os.path.join(self.model_repo_dir, self.mapping_name) create_dir(self.model_dir) self.auxiliary_dataset_dir = os.path.join( self.base_auxiliary_dataset_dir, self.mapping_name) create_dir(self.auxiliary_dataset_dir)
def statistics(trajs, save_stats_dir, stats, save_stats_name, save_plot=False): """ Plot basic statistical analysis , such as Args: ----- traj_dir: str. directory of raw GPS points save_stats_dir: str. directory of saving stats results stats: dict. dictionary of stats save_stats_name: str. name of saving stats. plot_flat: boolean. if plot the histogram """ create_dir(save_stats_dir) oids = set() tot_pts = 0 distance_data = [] # geographical distance duration_data = [] # time difference between end and start time of a trajectory seq_len_data = [] # length of each trajectory traj_avg_time_interval_data = [] traj_avg_dist_interval_data = [] if len(stats) == 0: # if new, initialize stats with keys stats['#object'], stats['#points'], stats['#trajectories'] = 0, 0, 0 stats['seq_len_data'], stats['distance_data'], stats['duration_data'], \ stats['traj_avg_time_interval_data'], stats['traj_avg_dist_interval_data'] = [], [], [], [], [] for traj in trajs: oids.add(traj.oid) nb_pts = len(traj.pt_list) tot_pts += nb_pts seq_len_data.append(nb_pts) distance_data.append(traj.get_distance() / 1000.0) duration_data.append(traj.get_duration() / 60.0) traj_avg_time_interval_data.append(traj.get_avg_time_interval()) traj_avg_dist_interval_data.append(traj.get_avg_distance_interval()) print('#objects_single:{}'.format(len(oids))) print('#points_single:{}'.format(tot_pts)) print('#trajectories_single:{}'.format(len(trajs))) stats['#object'] += len(oids) stats['#points'] += tot_pts stats['#trajectories'] += len(trajs) stats['seq_len_data'] += seq_len_data stats['distance_data'] += distance_data stats['duration_data'] += duration_data stats['traj_avg_time_interval_data'] += traj_avg_time_interval_data stats['traj_avg_dist_interval_data'] += traj_avg_dist_interval_data print('#objects_total:{}'.format(stats['#object'])) print('#points_total:{}'.format(stats['#points'])) print('#trajectories_total:{}'.format(stats['#trajectories'])) with open(os.path.join(save_stats_dir, save_stats_name + '.json'), 'w') as f: json.dump(stats, f) if save_plot: plot_hist(stats['seq_len_data'], '#Points', save_stats_dir, save_stats_name + '_nb_points_dist.png') plot_hist(stats['distance_data'], 'Distance (KM)', save_stats_dir, save_stats_name + '_distance_dist.png') plot_hist(stats['duration_data'], 'Duration (Min)', save_stats_dir, save_stats_name + '_duration_dist.png') plot_hist(stats['traj_avg_time_interval_data'], 'Time Interval (Sec)', save_stats_dir, save_stats_name + '_time_interval_dist.png') plot_hist(stats['traj_avg_dist_interval_data'], 'Distance Interval (Meter)', save_stats_dir, save_stats_name + '_distance_interval_dist.png') return stats
def select(model_name1, model_name2, distance_mode='min', weighted=True, probs_th=0.8): if weighted: tree_distance_mode = 'weighted_' + distance_mode else: tree_distance_mode = 'unweighted_' + distance_mode select_dir = os.path.join(COMMON_FLAGS.select_dir, tree_distance_mode, '%s_vs_%s' % (model_name1, model_name2)) create_dir(select_dir) # load csv: prediction_df = pd.read_csv( os.path.join(compare_dir, tree_distance_mode, "disagree_%s_%s.csv" % (model_name1, model_name2))) # filter all disagreed images by probs: probs_th = probs_th conditioned_lines = prediction_df.loc[prediction_df['probs1'] >= probs_th] # prob condition conditioned_lines = conditioned_lines.loc[prediction_df['probs2'] >= probs_th] # prob condition print('conditioned_lines:', type(conditioned_lines)) # sort by tree distance: sorted_lines = conditioned_lines.sort_values(by=['tree_dist'], ascending=False) # save selected lines: sorted_lines.to_csv(os.path.join( select_dir, "%s_sorted_conditioned_disagree_%s_%s.csv" % (tree_distance_mode, model_name1, model_name2)), index=False) print('sorted_lines:', sorted_lines.shape) # select 50 to plot: img_idx_to_plot, preds1_to_plot, preds2_to_plot, crawl_keys_to_plot = [], [], [], [] selected_num = 0 pred_pairs = [ ] # keep track of which pairs of different predictions have already occured. Each element is a set with two elements. occurance_num = { } # keep truck of how many times a class has been predicted. for i, row in sorted_lines.iterrows(): img_idx = int(row['disagree_idx']) pred1, pred2 = int(row['preds1']), int(row['preds2']) crawl_keys = int(row['gnd']) pred_pair = {pred1, pred2} # keep track of occurance_num: for pred in pred_pair: if pred not in occurance_num.keys(): occurance_num[pred] = 1 else: occurance_num[pred] += 1 # whether to select this image: if pred1 not in occurance_num.keys( ) or occurance_num[pred1] <= 3: # condition 1 if pred2 not in occurance_num.keys( ) or occurance_num[pred2] <= 3: # condition 2 if pred_pair not in pred_pairs: # condition 3 img_idx_to_plot.append(img_idx) preds1_to_plot.append(pred1) preds2_to_plot.append(pred2) crawl_keys_to_plot.append(crawl_keys) # update selected_num and pred_pairs: selected_num += 1 pred_pairs.append(pred_pair) if selected_num >= 60: # break condition break # convert list to ndarray: img_idx_to_plot, preds1_to_plot, preds2_to_plot, crawl_keys_to_plot = \ np.array(img_idx_to_plot), np.array(preds1_to_plot), np.array(preds2_to_plot), np.array(crawl_keys_to_plot) assert len(img_idx_to_plot) == 60 # plot big img: if True: img_lst = [] for idx in img_idx_to_plot: img_name = os.path.join(dataset_dir, str(idx) + '.jpg') img = resize(imread(img_name), (244, 244, 3)) img_lst.append(img) img_big = np.concatenate(img_lst, axis=1) img_lst = np.split(img_big, 6, axis=1) img_big = np.concatenate(img_lst, axis=0) print('img_big:', img_big.shape) imsave( os.path.join(select_dir, "disagree_%s_%s.png" % (model_name1, model_name2)), img_as_ubyte(img_big)) # save pred results in csv: if True: with open(os.path.join(json_dir, 'imgnetid2word.json'), 'r') as fp: imgnetid2word = json.load(fp) preds1_word_to_plot = np.array([ imgnetid2word[key] for key in preds1_to_plot.astype(str) ]).reshape((6, 10)) preds2_word_to_plot = np.array([ imgnetid2word[key] for key in preds2_to_plot.astype(str) ]).reshape((6, 10)) crawl_keys_word_to_plot = np.array([ imgnetid2word[key] for key in crawl_keys_to_plot.astype(str) ]).reshape((6, 10)) print('preds1_word_to_plot:', preds1_word_to_plot.shape) print('preds2_word_to_plot:', preds2_word_to_plot.shape) pd.DataFrame(preds1_word_to_plot).to_csv(os.path.join( select_dir, '%s_word_to_plot.csv' % model_name1), header=False, index=False) pd.DataFrame(preds2_word_to_plot).to_csv(os.path.join( select_dir, '%s_word_to_plot.csv' % model_name2), header=False, index=False) pd.DataFrame(crawl_keys_word_to_plot).to_csv(os.path.join( select_dir, 'crawl_keys_word_to_plot.csv'), header=False, index=False) pd.DataFrame(img_idx_to_plot).to_csv(os.path.join( select_dir, 'img_idx_to_plot.csv'), header=False, index=False)
def save_preprocessed_df(self, df, filename): preprocessed_mapping_dir = os.path.join(self.preprocessed_dataset_dir, self.mapping_name) create_dir(preprocessed_mapping_dir) df.to_csv(os.path.join(preprocessed_mapping_dir, f"{filename}.csv"))
print("loading param: ", param) model.state_dict()[param].data.copy_( old_state_dict[old_param].data) else: print("warning cannot load param: ", param) def freeze_model(model): # model.eval() for params in model.parameters(): params.requires_grad = False if isinstance(model, torch.nn.DataParallel): model.module.freeze_bn() else: model.freeze_bn() if __name__ == '__main__': config = get_parser() utils.create_dir(config.save_dir) with open(os.path.join(config.save_dir, 'config.txt'), 'w') as f: dic = vars(config) pp = pprint.PrettyPrinter(indent=1, width=80, depth=None, stream=f) pp.pprint(dic) if config.rnn_type == None: train_cnn(config) else: train_rnn(config)
batch_score = compute_score_with_logits(pred, a.data).sum() score += batch_score upper_bound += (a.max(1)[0]).sum() num_data += pred.size(0) score = score / len(val_loader.dataset) upper_bound = upper_bound / len(val_loader.dataset) return score, upper_bound if __name__ == '__main__': args = parse_args() # args.MFB_O = 500 # args.MFB_K = 3 args.mima = False utils.create_dir(args.output) logger = utils.Logger(os.path.join(args.output, 'log.txt')) logger.write(args.__repr__()) os.environ['CUDA_VISIBLE_DEVICES'] = '0' device = torch.device("cuda:" + str(args.gpu) if args.gpu >= 0 else "cpu") args.device = device # Fixed ramdom seed torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = True dictionary = dataset_RAD.Dictionary.load_from_file(os.path.join(args.RAD_dir, 'dictionary.pkl')) train_set = dataset_RAD.VQAFeatureDataset('train', args, dictionary, question_len=args.question_len) batch_size = args.batch_size model = Net(args, len(train_set.label2ans))
hash_of_file = hash_file(args.file, args.hash) print( "Hash algorithm used: {}\nHash of file: {}\n".format( args.hash, hash_of_file ) ) hash_report = file_url_report( urls["file_report_endpoint"], api_key, hash_type=hash_of_file ) print( "Report returned!\nSaving report to results/reports/files/{}.json".format( filename ) ) # Create the results directory tree if it doesn't exist create_dir("results/reports/files") with open("results/reports/files/" + filename + ".json", "w") as outfile: json.dump(hash_report, outfile, indent=4, sort_keys=True) # Looks up a report based on the supplied url using /file/report endpoint elif args.file_report: file_report = file_url_report( urls["file_report_endpoint"], api_key, hash_type=args.file_report ) print( "Report returned!\nSaving report to results/reports/files/{}.json".format( filename ) ) # Create the results directory tree if it doesn't exist create_dir("results/reports/files")
def add(files, time_uploaded, email): # should also check for torch availability here # https://stackoverflow.com/questions/44617476/how-to-execute-celery-tasks-conditionally-python counter = 0 file_paths = [] filenames = [] if checkGPU(): formatted_time = str(int(time_uploaded)) print("this is formatted", formatted_time) usr_dir = img_folder + email + '/' usr_img_dir = img_folder + email + '/' + formatted_time usr_input_img_dir = usr_img_dir + '/input' + '/' usr_transfer_img_dir = usr_img_dir + '/transfer' + '/' usr_output_img_dir = usr_img_dir + '/output' + '/' for file in files: file_data = file[0]['base64String'] file_type = file[0]['type'] time_uploaded = file[0]['timeUploaded'] file_name = file[0]['fileName'] print("this is file_type", file_type) img = stringToImage(file_data, file_type) img = toRGB(img) height = img.shape[0] width = img.shape[1] # resize image if necessary if height > max_height: img = rescaleImg(img, height, width, max_height) height = img.shape[0] width = img.shape[1] print("this is img", img) # img here is a numpy array now, save it create_dir(img_folder) if counter == 0: print("this is input") print("this is filename", file_name) create_dir(usr_dir) create_dir(usr_input_img_dir) create_dir(usr_transfer_img_dir) create_dir(usr_output_img_dir) # save original image it its own directory usr_input_img = usr_img_dir + '/input' + '/' + file_name file_paths.append(usr_input_img) saveImg(img, usr_input_img) filenames.append(file_name) else: usr_transfer_img = usr_transfer_img_dir = usr_img_dir + '/transfer' + '/' + file_name file_paths.append(usr_transfer_img) saveImg(img, usr_transfer_img) filenames.append(file_name) counter += 1 # outside the for loop, compare two folders # compareFolders(usr_input_img_dir, usr_transfer_img_dir) start_time = time.clock() print("this is before seg model") print("this is file_paths", file_paths) output_name = filenames[0].rsplit( '.', 1)[0] + '_' + filenames[1].rsplit('.', 1)[0] + '.png' st = styleModule.Style_Transfer(usr_input_img, usr_transfer_img, usr_output_img_dir, output_name) cache = st.process() end_time = time.clock() - start_time headers = {'Content-type': 'application/json'} params = { 'output_image': cache['output'], 'plot_image': cache['plot'], 'input_image': usr_input_img, 'transfer_image': usr_transfer_img, 'time_required': end_time, 'time_uploaded': time_uploaded, 'email': email } print("this is params", params) r = requests.post(url9, json=params, headers=headers) link = r.content.decode("utf-8") print(r.status_code) print('this is response', r) print("this is content", link) send_email(email, link) else: add.apply_async(countdown=120)
def main(): parser = argparse.ArgumentParser("PyTorch Face Recognizer") parser.add_argument('--cmd', default='test', type=str, choices=['train', 'test', 'extract'], help='train, test or extract') parser.add_argument('--arch_type', type=str, default='resnet50_ft', help='model type', choices=[ 'resnet50_ft', 'senet50_ft', 'resnet50_scratch', 'senet50_scratch', 'resnet152' ]) parser.add_argument('--dataset_dir', type=str, default='/media/hyo/文档/Dataset/vggface2/train', help='dataset directory') parser.add_argument('--log_file', type=str, default='./log_file', help='log file') parser.add_argument( '--train_img_list_file', type=str, default='/media/hyo/文档/Dataset/vggface2/train_list.txt', help='text file containing image files used for training') parser.add_argument( '--test_img_list_file', type=str, default='/media/hyo/文档/Dataset/vggface2/train_list.txt', help= 'text file containing image files used for validation, test or feature extraction' ) parser.add_argument( '--meta_file', type=str, default='/media/hyo/文档/Dataset/vggface2/identity_meta.csv', help='meta file') parser.add_argument('--checkpoint_dir', type=str, default='./checkpoint', help='checkpoints directory') parser.add_argument('--feature_dir', type=str, default='./feature', help='directory where extracted features are saved') parser.add_argument( '-c', '--config', type=int, default=1, choices=configurations.keys(), help='the number of settings and hyperparameters used in training') parser.add_argument('--batch_size', type=int, default=32, help='batch size') parser.add_argument('--resume', type=str, default='', help='checkpoint file') parser.add_argument('--weight_file', type=str, default='./weight/weight.pkl', help='weight file') parser.add_argument('--gpu', type=int, default=0) parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument( '--horizontal_flip', default=True, action='store_true', help='horizontally flip images specified in test_img_list_file') args = parser.parse_args() print(args) if args.cmd == "extract": utils.create_dir(args.feature_dir) if args.cmd == 'train': utils.create_dir(args.checkpoint_dir) cfg = configurations[args.config] log_file = args.log_file resume = args.resume os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) cuda = torch.cuda.is_available() if cuda: print("torch.backends.cudnn.version: {}".format( torch.backends.cudnn.version())) torch.manual_seed(1337) if cuda: torch.cuda.manual_seed(1337) # 0. id label map meta_file = args.meta_file id_label_dict = utils.get_id_label_map(meta_file) weight_file = args.weight_file # 1. data loader root = args.dataset_dir train_img_list_file = args.train_img_list_file test_img_list_file = args.test_img_list_file kwargs = {'num_workers': args.workers, 'pin_memory': True} if cuda else {} if args.cmd == 'train': dt = datasets.VGG_Faces2(root, train_img_list_file, id_label_dict, split='train') train_loader = torch.utils.data.DataLoader(dt, batch_size=args.batch_size, shuffle=True, **kwargs) dv = datasets.VGG_Faces2(root, test_img_list_file, id_label_dict, split='valid', horizontal_flip=args.horizontal_flip) val_loader = torch.utils.data.DataLoader(dv, batch_size=args.batch_size, shuffle=False, **kwargs) # 2. model include_top = True if args.cmd != 'extract' else False if 'resnet' in args.arch_type: model = ResNet.resnet152(num_classes=N_IDENTITY, include_top=include_top) # else: # model = SENet.senet50(num_classes=N_IDENTITY, include_top=include_top) print(model) start_epoch = 0 start_iteration = 0 if resume: checkpoint = torch.load(resume) model.load_state_dict(checkpoint['model_state_dict']) start_epoch = checkpoint['epoch'] start_iteration = checkpoint['iteration'] assert checkpoint['arch'] == args.arch_type print("Resume from epoch: {}, iteration: {}".format( start_epoch, start_iteration)) # else: # utils.load_state_dict(model, args.weight_file) if args.cmd == 'train': model.fc.reset_parameters() if cuda: model = model.cuda() criterion = nn.CrossEntropyLoss() if cuda: criterion = criterion.cuda() # 3. optimizer if args.cmd == 'train': optim = torch.optim.SGD([ { 'params': get_parameters(model, bias=False) }, { 'params': get_parameters(model, bias=True), 'lr': cfg['lr'] * 2, 'weight_decay': 0 }, ], lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) if resume: optim.load_state_dict(checkpoint['optim_state_dict']) # lr_policy: step last_epoch = start_iteration if resume else -1 lr_scheduler = torch.optim.lr_scheduler.StepLR(optim, cfg['step_size'], gamma=cfg['gamma'], last_epoch=last_epoch) if args.cmd == 'train': trainer = Trainer( cmd=args.cmd, cuda=cuda, model=model, criterion=criterion, optimizer=optim, lr_scheduler=lr_scheduler, train_loader=train_loader, val_loader=val_loader, log_file=log_file, max_iter=cfg['max_iteration'], checkpoint_dir=args.checkpoint_dir, print_freq=100, model_dict=weight_file, ) trainer.epoch = start_epoch trainer.iteration = start_iteration trainer.train() elif args.cmd == 'test': validator = Validator( cmd=args.cmd, cuda=cuda, model=model, criterion=criterion, val_loader=val_loader, log_file=log_file, print_freq=100, ) validator.validate() elif args.cmd == 'extract': extractor = Extractor( cuda=cuda, model=model, val_loader=val_loader, log_file=log_file, feature_dir=args.feature_dir, flatten_feature=True, print_freq=100, ) extractor.extract()
def install_linux_check(params): """check guest status after installation, including network ping, read/write option in guest. return value: 0 - ok; 1 - bad """ global logger logger = params['logger'] params.pop('logger') guestname = params.get('guestname') virt_type = params.get('virt_type') logger.info("the name of guest is %s" % guestname) # Connect to local hypervisor connection URI hypervisor = utils.get_hypervisor() logger.info("the type of hypervisor is %s" % hypervisor) conn = sharedmod.libvirtobj['conn'] domobj = conn.lookupByName(guestname) state = domobj.info()[0] if(state == libvirt.VIR_DOMAIN_SHUTOFF): logger.info("guest is shutoff, if u want to run this case, \ guest must be started") return 1 logger.info("get the mac address of vm %s" % guestname) mac = utils.get_dom_mac_addr(guestname) logger.info("the mac address of vm %s is %s" % (guestname, mac)) timeout = 300 while timeout: ipaddr = utils.mac_to_ip(mac, 180) if not ipaddr: logger.info(str(timeout) + "s left") time.sleep(10) timeout -= 10 else: logger.info("the ip address of vm %s is %s" % (guestname, ipaddr)) break if timeout == 0: logger.info("vm %s fail to get ip address" % guestname) return 1 time.sleep(120) logger.info("Now checking guest health after installation") domain_name=guestname blk_type=params['hddriver'] nic_type=params['nicdriver'] Test_Result = 0 # Ping guest from host logger.info("check point1: ping guest from host") if utils.do_ping(ipaddr, 20) == 1: logger.info("ping current guest successfull") else: logger.error("Error: can't ping current guest") Test_Result = 1 return Test_Result # Creat file and read file in guest. logger.info("check point2: creat and read dirctory/file in guest") if utils.create_dir(ipaddr, "root", "redhat") == 0: logger.info("create dir - /tmp/test successfully") if utils.write_file(ipaddr, "root", "redhat") == 0: logger.info("write and read file: /tmp/test/test.log successfully") else: logger.error("Error: fail to write/read file - /tmp/test/test.log") Test_Result = 1 return Test_Result else: logger.error("Error: fail to create dir - /tmp/test") Test_Result = 1 return Test_Result # Check whether vcpu equals the value set in geust config xml logger.info("check point3: check cpu number in guest equals to \ the value set in domain config xml") vcpunum_expect = int(utils.get_num_vcpus(domain_name)) logger.info("vcpu number in domain config xml - %s is %s" % \ (domain_name, vcpunum_expect)) vcpunum_actual = int(utils.get_remote_vcpus(ipaddr, "root", "redhat")) logger.info("The actual vcpu number in guest - %s is %s" % (domain_name, vcpunum_actual)) if vcpunum_expect == vcpunum_actual: logger.info("The actual vcpu number in guest is \ equal to the setting your domain config xml") else: logger.error("Error: The actual vcpu number in guest is \ NOT equal to the setting your domain config xml") Test_Result = 1 return Test_Result # Check whether mem in guest is equal to the value set in domain config xml logger.info("check point4: check whether mem in guest is equal to \ the value set in domain config xml") mem_expect = utils.get_size_mem(domain_name) logger.info("current mem size in domain config xml - %s is %s" % (domain_name, mem_expect)) mem_actual = utils.get_remote_memory(ipaddr, "root", "redhat") logger.info("The actual mem size in guest - %s is %s" % (domain_name, mem_actual)) diff_range = int(mem_expect) * 0.07 diff = int(mem_expect) - int(mem_actual) if int(math.fabs(diff)) < int(diff_range): logger.info("The actual mem size in guest is almost equal to \ the setting your domain config xml") else: logger.error("Error: The actual mem size in guest is NOT equal to \ the setting your domain config xml") Test_Result = 1 return Test_Result # Check app works fine in guest, such as: wget logger.info("check point5: check app works fine in guest, such as: wget") logger.info("get system environment information") envfile = os.path.join(HOME_PATH, 'global.cfg') logger.info("the environment file is %s" % envfile) envparser = env_parser.Envparser(envfile) file_url = envparser.get_value("other", "wget_url") if utils.run_wget_app(ipaddr, "root", "redhat", file_url, logger) == 0: logger.info("run wget successfully in guest.") else: logger.error("Error: fail to run wget in guest") Test_Result = 1 return Test_Result # Check nic and blk driver in guest if 'kvm' in virt_type or 'xenfv' in virt_type: logger.info("check point6: check nic and blk driver in guest is \ expected as your config:") if utils.validate_remote_nic_type(ipaddr, "root", "redhat", nic_type, logger) == 0 and \ utils.validate_remote_blk_type(ipaddr, "root", "redhat", blk_type, logger) == 0: logger.info("nic type - %s and blk type - %s check successfully" % (nic_type, blk_type)) else: logger.error("Error: nic type - %s or blk type - %s check failed" % (nic_type, blk_type)) Test_Result = 1 return Test_Result return Test_Result
train_trajs_dir = "../data/model_data/train_data/" valid_trajs_dir = "../data/model_data/valid_data/" test_trajs_dir = "../data/model_data/test_data/" if args.tandem_fea_flag: fea_flag = True else: fea_flag = False if args.load_pretrained_flag: model_save_path = args.model_old_path else: model_save_path = './results/'+args.module_type+'_kr_'+str(args.keep_ratio)+'_debug_'+str(args.debug)+\ '_gs_'+str(args.grid_size)+'_lam_'+str(args.lambda1)+\ '_attn_'+str(args.attn_flag)+'_prob_'+str(args.dis_prob_mask_flag)+\ '_fea_'+str(fea_flag)+'_'+time.strftime("%Y%m%d_%H%M%S") + '/' create_dir(model_save_path) logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s', filename=model_save_path + 'log.txt', filemode='a') rn = load_rn_shp(rn_dir, is_directed=True) raw_rn_dict = load_rn_dict(extra_info_dir, file_name='raw_rn_dict.json') new2raw_rid_dict = load_rid_freqs(extra_info_dir, file_name='new2raw_rid.json') raw2new_rid_dict = load_rid_freqs(extra_info_dir, file_name='raw2new_rid.json') rn_dict = load_rn_dict(extra_info_dir, file_name='rn_dict.json') mbr = MBR(args.min_lat, args.min_lng, args.max_lat, args.max_lng) grid_rn_dict, max_xid, max_yid = get_rid_grid(mbr, args.grid_size, rn_dict) args_dict['max_xid'] = max_xid