def prepare_data(self): self.dataset_class = DataLoader(self.train_path, self.test_path) self.train_data_df, self.test_data_df, self.n_users, self.n_items = self.dataset_class.load_file_as_dataFrame() self.train_matrix, _ = self.dataset_class.dataFrame_to_matrix(self.train_data_df, self.n_users, self.n_items) self.test_matrix, _ = self.dataset_class.dataFrame_to_matrix(self.test_data_df, self.n_users, self.n_items) pass
def greedy_tag(to_pred, model_file, feature_map, out_name="greedy_pred"): out_file = open(out_name, "wt") model = pickle.load(open(model_file, "rb")) ftr_builders = [TransitionFtr(out_dim=LEN_FTR), EmmisionFtr(out_dim=LEN_FTR), SuffixPrefix(out_dim=LEN_FTR), CombinationsWordsPos(out_dim=LEN_FTR), CostumeFtr()] dl = DataLoader(to_pred, feature_map, ftr_builders) all_count = 0 true_count = 0 len_data = len(dl) for j, (all_pos, all_words) in enumerate(dl.data): if (100 * j / len_data) % 10 == 0: print(str((100 * j / len_data)) + "%") prev_pos = [START, START] for i, (word, pos) in enumerate(zip(all_words, all_pos)): curr_pred = model.predict(dl.to_sparse(all_words, prev_pos, i)) prev_pos.append(pos) all_count += 1 curr_pred_label = dl.idx_to_label(int(curr_pred[0])) out_file.write(word + "/" + curr_pred_label + " ") true_count += 1 if pos == curr_pred_label else 0 # print(word, pos, dl.idx_to_label(int(curr_pred[0]))) out_file.write("\n") out_file.close() print(all_count, true_count, "\t~" + str(int(100*true_count/all_count)) + "%")
def prepare_data(self): self.path_train = './data/%s/%s_train.dat' % (self.data_set, self.data_set) path_test = './data/%s/%s_test.dat' % (self.data_set, self.data_set) self.dataset_class = DataLoader(self.path_train, path_test) self.train_data_df, _, self.n_users, self.n_items = self.dataset_class.load_file_as_dataFrame( )
def __init__(self, exp_counter, low_freq=0.1, hi_freq=3, pick_channels=['Cz'], signal_tmin=-3, signal_tmax=5, noise_tmin=3, noise_tmax=11, generate_report=False): self.exp_counter = exp_counter self.pick_channels = pick_channels self.data_loader = DataLoader(exp_counter=self.exp_counter) self.data_loader.init_task_dependent_variables() self.data_loader.load_data() self.exp_name = self.data_loader.exp_name self.channel_dict = self.data_loader.channel_dict self.fs = self.data_loader.fs self.low_freq = low_freq self.hi_freq = hi_freq self.signal_tmin = signal_tmin self.signal_tmax = signal_tmax self.noise_tmin = noise_tmin self.noise_tmax = noise_tmax self.report = mne.Report(verbose=True) self.generate_report = generate_report
def train(model, model_name): loader = DataLoader() pretrain_data, pretrain_labels, pretrain_names = loader.load_pretrain_datasets( ) # pretrain model model.fit(pretrain_data, pretrain_labels, batch_size=BATCH_SIZE, epochs=PRETRAIN_EPOCHS) deep_utils.create_directory("../models") model_filename = "../models/pretrained_" + model_name + ".h5" model.save(model_filename) train_data, train_labels, train_names = loader.load_train_datasets() test_data, test_labels, test_names = loader.load_test_datasets() # train model model.fit(train_data, train_labels, validation_data=(test_data, test_labels), batch_size=BATCH_SIZE, epochs=TRAIN_EPOCHS) deep_utils.create_directory("../models") model_filename = "../models/fine_tuned_" + model_name + ".h5" model.save(model_filename) # evaluate model scores = model.evaluate(test_data, test_labels, verbose=1) return scores
def best_threshold(model): def metrics(Y): positive = sum([y['target'] for y in Y]) thresholds = [0.5, 0.45, 0.4, 0.35, 0.3, 0.25] index = 0 right, wrong = 0, 0 existed_edges = {ids: test[ids]['source_edges'] for ids in test.ids} id2node = { node['osmid']: node for ids in test.ids for node in test[ids]['nodes'] } best_f1, best_th = 0, 0 for _, th in enumerate(thresholds): for i in range(index, len(Y)): if Y[i]['score'] < math.log(th): index = i break if is_valid({ 'start': Y[i]['start'], 'end': Y[i]['end'] }, existed_edges[Y[i]['id']], id2node): existed_edges[Y[i]['id']].append({ 'start': Y[i]['start'], 'end': Y[i]['end'] }) if Y[i]['target'] == 1: right += 1 else: wrong += 1 p = 1.0 * right / (right + wrong + 1e-9) r = 1.0 * right / positive f1 = 2 * p * r / (p + r + 1e-9) if best_f1 < f1: best_f1 = f1 best_th = th print(p, r, best_f1, best_th) return best_f1, best_th test = DataLoader( 'E:/python-workspace/CityRoadPrediction/data_20200610/test/') test.load_all_datas() result = load_model_result(model.lower(), data_dir) y = [] for city in result: for index, v in result[city].items(): for sample in v: y.append({ 'id': index, 'start': sample['start'], 'end': sample['end'], 'score': sample['score'], 'target': int(sample['target']) }) del result y = sorted(y, key=lambda e: e['score'], reverse=True) f1, th = metrics(y) print(f1, th)
def main(args): set_gpu_growth() dataset = args.dataset # 'A' or 'B' cfg.init_path(dataset) # 初始化路径名 print(cfg.WEIGHT_PATH) # 加载数据生成器 train_data_gen = DataLoader(cfg.TRAIN_PATH, cfg.TRAIN_GT_PATH, batch_size=cfg.TRAIN_BATCH_SIZE, shuffle=True, gt_downsample=True, mean=cfg.MEAN, std=cfg.STD) val_data_gen = DataLoader(cfg.VAL_PATH, cfg.VAL_GT_PATH, batch_size=cfg.VAL_BATCH_SIZE, shuffle=False, gt_downsample=True, mean=cfg.MEAN, std=cfg.STD) # 定义模型 input_shape = (None, None, 1) model = MCNN(input_shape) adam = Adam(lr=1e-4) model.compile(loss='mse', optimizer=adam, metrics=[mae, mse]) # 加载与训练模型 if args.weight_path is not None: model.load_weights(args.weight_path, by_name=True) # 定义callback checkpoint = ModelCheckpoint(filepath=cfg.WEIGHT_PATH, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=True, mode='min', period=5) callback_list = [checkpoint] # 训练 print('Training Part_{} ...'.format(dataset)) model.fit_generator(train_data_gen, validation_data=val_data_gen, epochs=cfg.EPOCHS, initial_epoch=args.init_epoch, callbacks=callback_list, use_multiprocessing=True, workers=4, verbose=1) model.save(cfg.WEIGHT_PATH)
def execute(self): # self.data_path_clean = './data/ml100k/ml100k_train.dat' # self.data_path_attacked = './results/data_attacked/ml100k/ml100k_AUSH_0.data' # path_test = self.data_path_clean.replace('train', 'test') # load real profile matrix dataset_class_real = DataLoader(self.data_path_clean, path_test) train_data_df_real, _, n_users_real, n_items_real = dataset_class_real.load_file_as_dataFrame() train_matrix_real, _ = dataset_class_real.dataFrame_to_matrix(train_data_df_real, n_users_real, n_items_real) train_matrix_real = train_matrix_real.toarray() # load fake profile matrix dataset_class_attacked = DataLoader(self.data_path_attacked, path_test) train_data_df_attacked, _, n_users_attacked, n_items_attacked = dataset_class_attacked.load_file_as_dataFrame() train_matrix_attacked, _ = dataset_class_attacked.dataFrame_to_matrix(train_data_df_attacked, n_users_attacked, n_items_attacked) train_matrix_fake = train_matrix_attacked.toarray()[n_users_real:, :] # cacu item distribution real_item_distribution = self.get_item_distribution(train_matrix_real) fake_item_distribution = self.get_item_distribution(train_matrix_fake) # TVD_distance = self.get_TVD_distance(real_item_distribution, fake_item_distribution) JS_distance = self.get_JS_distance(real_item_distribution, fake_item_distribution) # res_str = 'TVD:%.4f\tJS:%.4f' % (TVD_distance, JS_distance) print('result begin', res_str, 'result end') return TVD_distance, JS_distance
def __init__(self, gen_network: Callable, dis_network: Callable, dataloader_args: Dict): tf.enable_eager_execution() self.data_loader = DataLoader( **dataloader_args) # util to load dataset from dir self.dataset = self.data_loader.get_dataset() self.generator = gen_network() self.discriminator = dis_network(self.data_loader.output_shape, (1, )) noise = Input(shape=LATENT_SHAPE) gen_img = self.generator(noise) disc_output = self.discriminator(gen_img) self.combined = Model(noise, disc_output) self.combined.layers[ 2].trainable = False # combined model is only trained on generator network self.fixed_noise = tf.random.normal([128] + list(LATENT_SHAPE))
def main(args): dataset = args.dataset # 'A' or 'B' output_dir = os.path.join(cfg.HM_GT_PATH, 'Part_{}'.format(dataset)) for _dir in [cfg.HM_GT_PATH, output_dir]: if not os.path.exists(_dir): os.mkdir(_dir) test_path = cfg.TEST_PATH.format(dataset) test_gt_path = cfg.TEST_GT_PATH.format(dataset) # load data data_loader = DataLoader(test_path, test_gt_path, shuffle=False, gt_downsample=True) # data_loader = ImageDataLoader(test_path, test_gt_path, shuffle=False, gt_downsample=True,pre_load=True) # create heatmaps print('Creating heatmaps for Part_{} ...'.format(dataset)) for i, (img, den) in enumerate(data_loader): data = img gt = den img_name = data_loader.filename_list[i] gt = np.squeeze(gt) # shape(1, h, w, 1) -> shape(h, w) save_heatmap(gt, data, img_name, output_dir, gt=True) print('All Done.')
def __init__(self, path=None, json_file=None, yaml_file=None, split=0.1, nb_timesteps=6): self.split = split # Search json and yaml in path if path not None # else use directly json_file and yaml_file if path: path = path.rstrip('/') json_file = [file for file in glob("{}/*.json".format(path))] yaml_file = [file for file in glob("{}/*.yaml".format(path))] if len(json_file) != 1: print("No json or more than one file in the specified path.") exit(1) if len(yaml_file) != 1: print("No yaml or more than one file in the specified path.") exit(1) json_file = json_file[0] yaml_file = yaml_file[0] self.path = path self.json_file = json_file self.yaml_file = yaml_file self.data_formater = DataFormater(nb_timesteps=nb_timesteps) self.data_loader = DataLoader(json_file=json_file, yaml_file=yaml_file, nb_gates_nominal=2, nb_gates_ir=1) data = np.array(self.data_loader.data) self.init(data)
def prepareData(path): try: # embedder embedder = FeatureExtractor(config.EMBEDDING_MODEL_PATH) # face detector faceDetector = FaceDetector(config.FACE_DECTOR_PATH) # image paths imagePaths = DataLoader(config.DATASET_PATH) names = [] embeddedVectors = [] for imagePath in imagePaths: name = imagePath.split(os.path.sep)[1] img = cv2.imread(imagePath) H, W = img.shape[:2] # face detection detectedFaces = faceDetector.detect(img, .3) for detectedFace in detectedFaces: #grab bounding box bbox = detectedFace.bounding_box.flatten() * np.array( [W, H, W, H]) xmin, ymin, xmax, ymax = bbox.astype('int') # grab ROI from image roi = img[ymin:ymax, xmin:xmax] vector = embedder.run(roi) embeddedVectors.append(vector) names.append(name) data = {'data': embeddedVectors, 'names': names} with open(path, 'wb') as f: f.write(pickle.dumps(data)) except Exception as e: raise e
def main_process(): # 加载数据 data = DataLoader() data_frame = get_dataframe(data_conn=data) # 数据自定义过滤清洗 data_frame = custom_dataframe_handler(df=data_frame)
def main(train_flag=False, test_flag=False, use_encoder=False): # before train config = Config() data_loader = DataLoader(TRAIN_DATA_PATH) data_loader.process_raw_data() sent_models = SentMatching(data_loader) model = sent_models.model encoder = sent_models.encoder if train_flag: ranking_model = sent_models.get_ranking_model( data_loader.y_valid.shape[0]) model, encoder = train(model, encoder, ranking_model, data_loader, config) else: model.load_weights(MODEL_PATH) encoder = Model(inputs=model.input, outputs=model.get_layer(index=3).output) if test_flag: # 测试文件样例大小最小:11 test_data, test_vec = evaluate_(encoder, TEST_DATA_PATH, data_loader, sent_models) # test_data, x_test, y_test, test_id2g = data_loader.process_test_data(TEST_DATA_PATH) # test_vec = encoder.predict(x_test, # verbose=True, # batch_size=1000) # encoder计算句向量 while True: input_sent = input() predict(encoder, data_loader, test_data, test_vec, input_sent) # todo if use_encoder: tmp_data, x_tmp, y_tmp, tmp_id2g = data_loader.process_test_data( PATH_FOR_ENCODER) tmp_vec = encoder.predict(x_tmp, verbose=True, batch_size=1000) # encoder计算句向量 print(tmp_vec.shape) print(tmp_vec[0].shape) sims = np.dot(tmp_vec, tmp_vec[0]) for i in sims.argsort()[-1:][::-1]: print(tmp_data.iloc[i][1], sims[i]) y_pred = KMeans(n_clusters=3, random_state=42).fit_predict(tmp_vec) print(y_pred)
def execute(self): # temp file path cur_time = time.time() label_file_path = './label_%f.tmp' % cur_time conf_file_path = './conf_%f.tmp' % cur_time args = { 'ratings': self.data_path_attacked, 'ratings.setup': '-columns 0 1 2', 'label': label_file_path, 'methodName': 'FAP', 'evaluation.setup': '-ap 0.000001', 'seedUser': 5, 'topKSpam': 50, 'output.setup': 'on -dir ./', } # write conf file with open(conf_file_path, 'w') as fout: fout.write('\n'.join(['%s=%s' % i for i in args.items()])) # write label file _, _, n_users_real, _ = DataLoader(self.data_path_clean, self.data_path_clean.replace('train', 'test'), verbose=False).load_file_as_dataFrame() _, _, n_users_attacked, _ = DataLoader(self.data_path_attacked, self.data_path_clean.replace('train', 'test'), verbose=False).load_file_as_dataFrame() uids, labels = np.arange(n_users_attacked), np.zeros(n_users_attacked) labels[n_users_real:] = 1 with open(label_file_path, 'w') as fout: fout.write('\n'.join(["%d\t%d" % i for i in list(zip(uids, labels))])) sd = SDLib(Config(conf_file_path)) result = sd.execute() res_str = "pre:%.4f\trecall:%.4f" % tuple(result) print('result begin', res_str, 'result end') # os.remove(label_file_path) os.remove(conf_file_path) # pass
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = "0" dataset = args.dataset # 'A' or 'B' output_dir = args.output_dir weight_path = args.weight_path cfg.init_path(dataset) heatmaps_dir = os.path.join(output_dir, 'heatmaps') # directory to save heatmap results_txt = os.path.join(heatmaps_dir, 'results.txt') # file to save predicted results for _dir in [output_dir, heatmaps_dir]: if not os.path.exists(_dir): os.mkdir(_dir) # load test set data_loader = DataLoader(cfg.TEST_PATH, cfg.TEST_GT_PATH, shuffle=False, gt_downsample=True) # data_loader = ImageDataLoader(cfg.TEST_PATH, cfg.TEST_GT_PATH, shuffle=False, gt_downsample=True, pre_load=True) # load model model = MCNN(input_shape=(None, None, 1)) model.load_weights(weight_path, by_name=True) # test print('Testing Part_{} ...'.format(dataset)) mae = 0.0 mse = 0.0 print(model.input_shape) for idx, (img, g) in enumerate(data_loader): if idx == len(data_loader.filename_list): break print(idx) gt = g data = img filename = data_loader.filename_list[idx] pred = model.predict(data) pred *= cfg.STD pred += cfg.MEAN gt_count = np.sum(gt) pred_count = np.sum(pred) mae += abs(gt_count - pred_count) mse += ((gt_count - pred_count) * (gt_count - pred_count)) # create and save heatmap pred = np.squeeze(pred) # shape(1, h, w, 1) -> shape(h, w) # save_heatmap(pred, img, filename, heatmaps_dir) # save results with open(results_txt, 'a') as f: line = '<{}> {:.2f} -- {:.2f}\n'.format(filename, gt_count, pred_count) f.write(line) mae = mae / len(data_loader) mse = np.sqrt(mse / len(data_loader)) print('MAE: %0.2f, MSE: %0.2f' % (mae, mse)) with open(results_txt, 'a') as f: f.write('MAE: %0.2f, MSE: %0.2f' % (mae, mse))
def train(model, model_name): loader = DataLoader() train_data, train_labels, train_names = loader.load_train_datasets() test_data, test_labels, test_names = loader.load_test_datasets() model.fit(train_data, train_labels, batch_size=BATCH_SIZE, epochs=TRAIN_EPOCHS, validation_data=(test_data, test_labels), shuffle=True) # save trained model deep_utils.create_directory("../models") model_filename = "../models/base_" + model_name + ".h5" model.save(model_filename) scores = model.evaluate(test_data, test_labels, verbose=1) return scores
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = "0" dataset = args.dataset # 'A' or 'B' if dataset == 'A': model_path = './trained_models/mcnn_A_train.hdf5' else: model_path = './trained_models/mcnn_B_train.hdf5' output_dir = './output_{}/'.format(dataset) heatmaps_dir = os.path.join(output_dir, 'heatmaps') # directory to save heatmap results_txt = os.path.join(output_dir, 'results.txt') # file to save predicted results for _dir in [output_dir, heatmaps_dir]: if not os.path.exists(_dir): os.mkdir(_dir) test_path = cfg.TEST_PATH.format(dataset) test_gt_path = cfg.TEST_GT_PATH.format(dataset) # load test set print('Loading data, wait a moment...') data_loader = DataLoader(test_path, test_gt_path, shuffle=False, gt_downsample=True) # load model model = load_model(model_path) # test print('Testing Part_{} ...'.format(dataset)) mae = 0.0 mse = 0.0 for blob in data_loader: img = blob['data'] gt = blob['gt'] pred = model.predict(np.expand_dims(img, axis=0)) gt_count = np.sum(gt) pred_count = np.sum(pred) mae += abs(gt_count - pred_count) mse += ((gt_count - pred_count) * (gt_count - pred_count)) # create and save heatmap pred = np.squeeze(pred) # shape(1, h, w, 1) -> shape(h, w) save_heatmap(pred, blob, test_path, heatmaps_dir) # save results with open(results_txt, 'a') as f: line = '<{}> {:.2f} -- {:.2f}\n'.format( blob['fname'].split('.')[0], gt_count, pred_count) f.write(line) mae = mae / data_loader.num_samples mse = np.sqrt(mse / data_loader.num_samples) print('MAE: %0.2f, MSE: %0.2f' % (mae, mse)) with open(results_txt, 'a') as f: f.write('MAE: %0.2f, MSE: %0.2f' % (mae, mse))
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = "1" dataset = args.dataset # 'A' or 'B' output_dir = args.output_dir weight_path = args.weight_path cfg.init_path(dataset) heatmaps_dir = os.path.join(output_dir, 'heatmaps') # directory to save heatmap results_txt = os.path.join(output_dir, 'results.txt') # file to save predicted results for _dir in [output_dir, heatmaps_dir]: if not os.path.exists(_dir): os.mkdir(_dir) # load test set data_loader = DataLoader(cfg.TEST_PATH, cfg.TEST_GT_PATH, shuffle=False) # load model print('[INFO] Load model ...') model = CMTL(input_shape=(None, None, 1)) model.load_weights(weight_path, by_name=True) # test print('[INFO] Testing Part_{} ...'.format(dataset)) mae = 0.0 mse = 0.0 acc = 0.0 for blob in data_loader.blob_list: img = blob['data'] gt_den = blob['gt_den'] gt_cls = np.argmax(blob['gt_class']) pred_den, pred_cls = model.predict(img[np.newaxis, ...]) if np.argmax(pred_cls[0]) == gt_cls: acc += 1 gt_count = np.sum(gt_den) pred_count = np.sum(pred_den) mae += abs(gt_count - pred_count) mse += ((gt_count - pred_count) * (gt_count - pred_count)) # # create and save heatmap # pred = np.squeeze(pred) # shape(1, h, w, 1) -> shape(h, w) # save_heatmap(pred, blob, test_path, heatmaps_dir) # save results with open(results_txt, 'a') as f: line = '<{}> {:.2f}--{:.2f}\t{}--{}\n'.format( blob['fname'].split('.')[0], gt_count, pred_count, gt_cls, np.argmax(pred_cls[0])) f.write(line) mae = mae / data_loader.num_samples mse = np.sqrt(mse / data_loader.num_samples) acc = acc / data_loader.num_samples print('[RESULT] MAE: %0.2f, MSE: %0.2f, Acc: %0.2f' % (mae, mse, acc)) with open(results_txt, 'a') as f: f.write('MAE: %0.2f, MSE: %0.2f, Acc: %0.2f' % (mae, mse, acc))
def prepare_data(self): self.path_train = './data/%s/%s_train.dat' % (self.data_set, self.data_set) path_test = './data/%s/%s_test.dat' % (self.data_set, self.data_set) dataset_class = DataLoader(self.path_train, path_test) self.train_data_df, self.test_data_df, self.n_users, self.n_items = dataset_class.load_file_as_dataFrame() train_matrix, _ = dataset_class.dataFrame_to_matrix(self.train_data_df, self.n_users, self.n_items) test_matrix, _ = dataset_class.dataFrame_to_matrix(self.test_data_df, self.n_users, self.n_items) self.train_array, self.test_array = train_matrix.toarray(), test_matrix.toarray() self.data_loader = torch.utils.data.DataLoader(dataset=torch.from_numpy(self.train_array).type(torch.float32), batch_size=self.batch_size_D, shuffle=True, drop_last=True) self.target_users = np.where(self.train_array[:, self.target_id] == 0)[0] attack_target = np.zeros((len(self.target_users), self.n_items)) attack_target[:, self.target_id] = 1.0 self.attack_target = torch.from_numpy(attack_target).type(torch.float32).to(self.device) pass
def main(args): dataset = args.dataset # 'A' or 'B' cfg.init_path(dataset) # 初始化路径名 # 加载数据生成器 train_data_gen = DataLoader(cfg.TRAIN_PATH, cfg.TRAIN_GT_PATH, batch_size=cfg.TRAIN_BATCH_SIZE, shuffle=True, gt_downsample=True) dens = [np.ravel(den) for im, den in train_data_gen] dens = np.concatenate(dens, axis=0) print("mean:{},std:{}".format(np.mean(dens), np.std(dens)))
def main(args): dataset = args.dataset neighborhood_size = args.neighborhood_size recommended_list_size = args.recommended_list_size data_loader = DataLoader(dataset) data_loader.load_data() user_number, item_number = data_loader.get_dataset_info() train, test = data_loader.train_test_split() recommender = RecommenderSystem() rating_predictions = recommender.predict_topk_nobias(train, k=neighborhood_size) evaluator = RecommenderEvaluator() print("RMSE={}".format(evaluator.rmse(rating_predictions, test))) print("MAE={}".format(evaluator.mae(rating_predictions, test))) mean_test = np.true_divide(test.sum(1), (test != 0).sum(1)) precisions, recalls = evaluator.precision_recall_at_k( rating_predictions, test, mean_test, user_number, recommended_list_size) precision = sum(prec for prec in precisions.values()) / len(precisions) recall = sum(rec for rec in recalls.values()) / len(recalls) f1 = evaluator.f1(precision, recall) print("Precision({})={}".format(recommended_list_size, precision)) print("Recall({})={}".format(recommended_list_size, recall)) print("F1({})={}".format(recommended_list_size, f1))
def train_model(args): # Data data_loader = DataLoader(args.batch_size) train_ds, test_ds = data_loader.make_dataset() # Prior and Plot objects prior_factory = PriorFactory( args.n_classes, gm_x_stddev=args.gm_x_stddev, gm_y_stddev=args.gm_y_stddev ) plot_factory = PlotFactory( prior_factory, args.results_dir, args.prior_type, args.n_classes, data_loader.img_size_x, data_loader.img_size_y, ) # Model gan = Gan(image_dim=data_loader.img_size_x * data_loader.img_size_y) # Optimizers optimizers_dict = { "encoder": tf.optimizers.Adam(learning_rate=args.learning_rate), "discriminator": tf.optimizers.Adam(learning_rate=args.learning_rate / 5), "gan": tf.optimizers.Adam(learning_rate=args.learning_rate), } # Training train_all_steps( gan, optimizers_dict, train_ds, args.n_epochs, args.prior_type, args.n_classes, data_loader, plot_factory, args.log_dir, )
def __init__(self, to_pred, model_file, feature_map, out_name="greedy_pred"): self._probs = {} self._model = pickle.load(open(model_file, "rb")) ftr_builders = [ TransitionFtr(out_dim=LEN_FTR), EmmisionFtr(out_dim=LEN_FTR), SuffixPrefix(out_dim=LEN_FTR), CombinationsWordsPos(out_dim=LEN_FTR), CostumeFtr() ] self._dl = DataLoader(to_pred, feature_map, ftr_builders) self._label_list = self._dl.label_list + [START] self._label_to_idx = { label: i for i, label in enumerate(self._label_list) } self._tagger = ViterbiAlg(self._label_list, self._prob_func) self._init_probs()
def run(self): start = datetime.now() thread_id = start.strftime('%Y%m%d%H%M%S') logging.info("Thread %s - %s started" % (thread_id, self.file_path)) sftp_reader = SFTPReader(self.host, self.port, self.username, self.password, self.ssh_key_path, self.sftp_max_retry) byte_io = sftp_reader.load_file(self.file_path) sftp_reader.close() step = datetime.now() logging.info("Thread %s - %s loaded data - Time: %d" % (thread_id, self.file_path, (step - start).seconds)) if self.try_send_data: data_loader = DataLoader() processed_df = data_loader.load(byte_io, self.columns_seletion, fill_na_dict=self.fill_na_dict, concat_dict=self.concat_dict, rename_dict=self.rename_dict) step = datetime.now() logging.info("Thread %s - %s parsed data - Time: %d" % (thread_id, self.file_path, (step - start).seconds)) event_sender = EventSender(self.connection_string, self.eventhub_name, self.max_event_per_batch, self.eventhub_max_retry, self.metadata, self.zvelo_helper) event_sender.send(processed_df) event_sender.close() step = datetime.now() logging.info("Thread %s - %s sent data - Time: %d" % (thread_id, self.file_path, (step - start).seconds)) # Copy raw data to ADLS if (not self.blob_name == False) or (not self.blob_key == False): blob_helper = BlobHelper(self.blob_name, self.blob_key) file_name = self.file_path[self.file_path.rindex("/") + 1 : ] blob_path = "%s/%s" % (self.blob_path, file_name) byte_io.seek(0) blob_helper.upload_data(byte_io, self.blob_container, blob_path, overwrite=True) step = datetime.now() logging.info("Thread %s - %s stopped - Time: %d" % (thread_id, self.file_path, (step - start).seconds))
def prepare_real_samples(): """ prepare_real_samples function load the data provider and set training and testing dataset :return: X_train """ # loading real data (X_train, y_train), (X_test, y_test) = DataLoader.load_data() # convert from int to float and [0,255] to [-1,1] scaling X_train = (X_train.astype(np.float32) - 127.5) / 127.5 X_train = X_train[:, :, :, None] X_test = X_test[:, :, :, None] # X_train = X_train.reshape((X_train.shape, 1) + X_train.shape[1:]) return X_train
def prepare_real_samples(self): """ prepare_real_samples function load the data provider and set training and testing dataset :return: X """ # loading real data (x_train, _), (_, _) = DataLoader.load_data() # adding channels to expand to 3d X = expand_dims(x_train, axis=-1) # convert from int to float and [0,255] to [-1,1] scaling X = X.astype('float32') X = (X - 127.5) / 127.5 return X
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = "0" dataset = args.dataset # 'A' or 'B' train_path = cfg.TRAIN_PATH.format(dataset) train_gt_path = cfg.TRAIN_GT_PATH.format(dataset) val_path = cfg.VAL_PATH.format(dataset) val_gt_path = cfg.VAL_GT_PATH.format(dataset) # 加载数据 print('Loading data, wait a moment...') train_data_gen = DataLoader(train_path, train_gt_path, shuffle=True, gt_downsample=True) val_data_gen = DataLoader(val_path, val_gt_path, shuffle=False, gt_downsample=True) # 定义模型 input_shape = (None, None, 1) model = MCNN(input_shape) # 编译 adam = Adam(lr=1e-4) model.compile(loss='mse', optimizer=adam, metrics=[mae, mse]) # 定义callback checkpointer_best_train = ModelCheckpoint(filepath=os.path.join( cfg.MODEL_DIR, 'mcnn_' + dataset + '_train.hdf5'), monitor='loss', verbose=1, save_best_only=True, mode='min') callback_list = [checkpointer_best_train] # 训练 print('Training Part_{} ...'.format(dataset)) model.fit_generator( train_data_gen.flow(cfg.TRAIN_BATCH_SIZE), steps_per_epoch=train_data_gen.num_samples // cfg.TRAIN_BATCH_SIZE, validation_data=val_data_gen.flow(cfg.VAL_BATCH_SIZE), validation_steps=val_data_gen.num_samples // cfg.VAL_BATCH_SIZE, epochs=cfg.EPOCHS, callbacks=callback_list, verbose=1)
def main(args): dataset = args.dataset # 'A' or 'B' output_dir = os.path.join(cfg.HM_GT_PATH, 'Part_{}'.format(dataset)) for _dir in [cfg.HM_GT_PATH, output_dir]: if not os.path.exists(_dir): os.mkdir(_dir) test_path = cfg.TEST_PATH.format(dataset) test_gt_path = cfg.TEST_GT_PATH.format(dataset) # load data data_loader = DataLoader(test_path, test_gt_path, shuffle=False, gt_downsample=True) # create heatmaps print('Creating heatmaps for Part_{} ...'.format(dataset)) for blob in data_loader: gt = blob['gt'] # create and save heatmap gt = np.squeeze(gt) # shape(1, h, w, 1) -> shape(h, w) save_heatmap(gt, blob, test_path, output_dir, gt=True) print('All Done.')
def prepare_train_embedding(self, data_dir): data = copy.deepcopy(self.train_loader.data[self.city]) data.update(self.tester.test_loader.data[self.city]) keys = sorted(list(data.keys())) embeds = {} for i in range(0, len(keys), 40): print(self.city, i, len(keys)) nodes, edges = [], [] for index in keys[i: i + 40]: nodes += data[index]['nodes'] edges += data[index]['source_edges'] G = DataLoader.build_graph(nodes, edges) self.vec_model.build_model(G) embeds.update(self.vec_model.train(embed_size=self.embed_dim)) for index in self.train_loader.data[self.city]: positive, negative = [], [] sample = self.train_loader.data[self.city][index] for i, n1 in enumerate(sample['nodes']): for j, n2 in enumerate(sample['nodes'][i + 1:]): if {'start': n1['osmid'], 'end': n2['osmid']} in sample['target_edges'] or \ {'start': n2['osmid'], 'end': n1['osmid']} in sample['target_edges']: positive.append([n1['osmid'], n2['osmid'], 1]) elif {'start': n1['osmid'], 'end': n2['osmid']} not in sample['source_edges'] and \ {'start': n2['osmid'], 'end': n1['osmid']} not in sample['source_edges']: negative.append([n1['osmid'], n2['osmid'], 0]) samples = positive + negative for (start, end, target) in samples: self.embedding.append({ 'start_id': str(start), 'end_id': str(end), 'start_embedding': embeds[str(start)] if str(start) in embeds else np.zeros(self.embed_dim), 'end_embedding': embeds[str(end)] if str(end) in embeds else np.zeros(self.embed_dim), 'target': target, }) pickle.dump(self.embedding, open(data_dir + 'train/' + self.city + '_embedding.pkl', 'wb')) test_embedding = {} for index in self.tester.test_loader.data[self.city]: positive, negative = [], [] sample = self.tester.test_loader.data[self.city][index] for i, n1 in enumerate(sample['nodes']): for j, n2 in enumerate(sample['nodes'][i + 1:]): if {'start': n1['osmid'], 'end': n2['osmid']} in sample['target_edges'] or \ {'start': n2['osmid'], 'end': n1['osmid']} in sample['target_edges']: positive.append([n1['osmid'], n2['osmid'], 1]) elif {'start': n1['osmid'], 'end': n2['osmid']} not in sample['source_edges'] and \ {'start': n2['osmid'], 'end': n1['osmid']} not in sample['source_edges']: negative.append([n1['osmid'], n2['osmid'], 0]) samples = positive + negative test_embedding[index] = [] for (start, end, target) in samples: test_embedding[index].append({ 'start_id': str(start), 'end_id': str(end), 'start_embedding': embeds[str(start)] if str(start) in embeds else np.zeros(self.embed_dim), 'end_embedding': embeds[str(end)] if str(end) in embeds else np.zeros(self.embed_dim), 'target': target, }) print(self.city, len(self.embedding), len(test_embedding)) pickle.dump(test_embedding, open(data_dir + 'test/' + self.city + '_embedding.pkl', 'wb'))