def test(self, args): if args.which_direction == 'AtoB': sample_files = glob('./datasets/{}/*.*'.format(self.dataset_dir + '/testA_b')) elif args.which_direction == 'BtoA': sample_files = glob('./datasets/{}/*.*'.format(self.dataset_dir + '/testB')) else: raise Exception('--which_direction must be AtoB or BtoA') # write html for visual comparison index_path = os.path.join( args.test_dir, '{0}_index.html'.format(args.which_direction)) index = open(index_path, "w") index.write("<html><body><table><tr>") index.write("<th>name</th><th>input</th><th>output</th></tr>") out_var, in_var = ( self.testB, self.test_A) if args.which_direction == 'AtoB' else (self.testA, self.test_B) for sample_file in sample_files: print('Processing image: ' + sample_file) sample_image = [load_test_data(sample_file, args.fine_size)] sample_image = np.array(sample_image).astype(np.float32) new_shape = list(sample_image.shape) + [1] sample_image = np.reshape(sample_image, newshape=new_shape) sample_image = sample_image[:, :, :, :self.input_c_dim] test_path = os.path.join(args.test_dir, args.dataset_dir) if not os.path.exists(test_path): os.makedirs(test_path) image_path = os.path.join( args.test_dir, args.dataset_dir, '{0}_{1}'.format(args.which_direction, os.path.basename(sample_file))) fake_img = self.sess.run(out_var, feed_dict={in_var: sample_image}) save_images(fake_img, [1, 1], image_path) index.write("<td>%s</td>" % os.path.basename(image_path)) index.write("<td><img src='%s'></td>" % (sample_file if os.path.isabs(sample_file) else ('..' + os.path.sep + sample_file))) index.write("<td><img src='%s'></td>" % (image_path if os.path.isabs(image_path) else ('..' + os.path.sep + image_path))) index.write("</tr>") index.close()
def pred(_log, _config): p = _config modelname = file2name[p['modelfn']] mod_model = importlib.import_module('models.%s' % p['modelfn']) model_cls = getattr(mod_model, modelname) model_params = {k: v for k, v in p.items() if k in model_cls.params or k == 'modelfn'} model = model_cls(model_params, rnd_seed=p['seed']) expid = model.params_to_string(model_params) outdir_plot=trunc_dir('%s/train_%s/%s/predict_per_epoch/test_%s' % (p['parentdir'], p['train_years'], p['expname'], p['test_year'])) outdir_run=trunc_dir('%s/%s'%(outdir_plot, expid)) tmp_dir=trunc_dir(os.path.join(outdir_run,'tmp')) weight_dir=trunc_dir('%s/train_%s/%s/model_weight/%s' % (p['parentdir'], p['train_years'],p['expname'], expid)) detail_outdir=trunc_dir('%s/train_%s/%s/model_detail/' % (p['parentdir'], p['train_years'], p['expname'])) assert os.path.isdir(weight_dir), "weight_dir " + weight_dir + " does not exist. Make sure you trained the model." assert os.path.isdir(detail_dir), "detail_dir " + detail_dir + " does not exist. Make sure you trained the model." if len(os.listdir(weight_dir)) < 1: raise SoftFailure('weight dir empty') try: if not os.path.isdir(outdir_run): os.makedirs(outdir_run) os.makedirs(tmp_dir) except OSError: pass _log.info('Processing {0}'.format(outdir_run)) ################### label2tlabel={4:2,3:2,2:2,1:1,0:0,-2:0} topk4eval=20 NGRAM_NFILTER, N_GRAMS = get_ngram_nfilter(p['winlen'], p['qproximity'], p['maxqlen'], p['xfilters']) _log.info('process {0} and output to {1}'.format(weight_dir, outdir_run)) _log.info('{0} {1} {2} {3} {4}'.format(p['distill'], 'NGRAM_NFILTER', NGRAM_NFILTER, 'N_GRAMS', N_GRAMS)) # prepare train data qids = get_train_qids(p['test_year']) qrelf = get_qrelf(qrelfdir, p['test_year']) qid_cwid_label = read_qrel(qrelf, qids, include_spam=False) test_qids =[qid for qid in qids if qid in qid_cwid_label] _log.info('%s test_num %d '%(p['test_year'], len(test_qids))) f_ndcg=dict() f_epochs = set() # sort weights by time and only use the first weights for each epoch # (in case there are duplicate weights from a failed/re-run train) for f in sorted(os.listdir(weight_dir), key=lambda x: os.path.getctime(os.path.join(weight_dir, x))): if f.split('.')[-1] != 'h5': continue cols = f.split('.')[0].split('_') if len(cols) == 4: nb_epoch, loss, n_batch, n_samples = int(cols[0]), int(cols[1]), int(cols[2]), int(cols[3]) if nb_epoch <= p['epochs'] and nb_epoch not in f_epochs: f_epochs.add(nb_epoch) f_ndcg[f]=(nb_epoch, loss, n_batch, n_samples) finished_epochs = {} for fn in sorted(os.listdir(outdir_run), key=lambda x: os.path.getctime(os.path.join(outdir_run, x))): if fn.endswith(".run"): fields = fn[:-4].split("_") # trim .run assert len(fields) == 5 epoch, loss = int(fields[0]), int(fields[4]) ndcg, mapv, err = float(fields[1]), float(fields[2]), float(fields[3]) #assert epoch not in finished_epochs if epoch in finished_epochs: _log.error("TODO two weights exist for same epoch") finished_epochs[epoch] = (epoch, err, ndcg, mapv, loss) _log.info('skipping finished epochs: {0}'.format(finished_epochs)) def model_pred(NGRAM_NFILTER, weight_file, test_data, test_docids, test_qids): dump_modelplot(model.build(), detail_outdir + 'predplot_' + expid) model_predict = model.build_from_dump(weight_file) qid_cwid_pred = pred_label(model_predict, test_data, test_docids, test_qids) return qid_cwid_pred test_doc_vec, test_docids, test_qids=load_test_data(qids, rawdoc_mat_dir, qid_cwid_label, N_GRAMS, p) epoch_err_ndcg_loss=list() _log.info('start {0} {1} {2}'.format(expid, p['train_years'], p['test_year'])) for f in sorted(f_ndcg, key=lambda x:f_ndcg[x][0]): nb_epoch, loss, n_batch, n_samples = f_ndcg[f] if nb_epoch in finished_epochs: epoch_err_ndcg_loss.append(finished_epochs[nb_epoch]) continue weight_file = os.path.join(weight_dir, f) qid_cwid_pred = model_pred(NGRAM_NFILTER, weight_file, test_doc_vec, test_docids, test_qids) ndcg20, err20, mapv = eval_run(_log, qid_cwid_pred, expid, perlf, treceval, tmp_dir, topk4eval, qrelf) loss = int(loss) out_name = '%d_%0.4f_%0.4f_%0.4f_%d.run' % (nb_epoch, ndcg20, mapv, err20, loss) epoch_err_ndcg_loss.append((nb_epoch, err20, ndcg20, mapv, loss)) print_run(qid_cwid_pred, outdir_run, out_name, expid) _log.info('finished {0}'.format(f)) _log.info('finish {0} {1} {2}'.format(expid, p['train_years'], p['test_year'])) plot_curve(epoch_err_ndcg_loss, outdir_plot, expid, p) if max(f_epochs) < p['epochs'] - 3: raise SoftFailure("prediction finished, but not all epochs are available yet. last epoch found: %s" % max(f_epochs))
from utils.utils import load_train_data, load_test_data, clean_tweets, build_word_dict, load_glove_embeddings from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.svm import SVC from sklearn.metrics import classification_report from sklearn.ensemble import RandomForestClassifier vectorizer = TfidfVectorizer() x_a, y_a, y_b = load_train_data('../data/OLIDv1.0') X_train_a = vectorizer.fit_transform(clean_tweets(x_a)) clf = SVC(gamma='auto', random_state=1, kernel='linear') # clf = RandomForestClassifier(n_estimators=100, max_depth=5,random_state=0) clf.fit(X_train_a, y_a) X_test_a, y_test_a = load_test_data('../data/OLIDv1.0/testset-levela.tsv', '../data/OLIDv1.0/labels-levela.csv') X_test_a = vectorizer.transform(clean_tweets(X_test_a)) pred = clf.predict(X_test_a) print(classification_report(y_test_a, pred)) top = 0 bt = 0 for i in range(len(pred)): bt += 1 if pred[i] == y_test_a[i]: top += 1 print(float(top / bt)) # Task B new_y_b = [] new_x_b = [] for i in range(len(x_a)):
def test(): assert Calculator(["+*"]).sumData(utils.load_test_data(tD)) == tA1 assert Calculator(["+", "*"]).sumData(utils.load_test_data(tD)) == tA2 return "Pass!"
# Compile the model print( '\n\n\n', 'Compiling model..', runID, '\n\n\tGPU ' + (str(args.gpus) + ' gpus' if args.gpus > 1 else args.gpuids) + '\t\tBatch size [ ' + str(args.bs) + ' ] ' + ' \n\n') model.compile(loss=depth_loss_function, optimizer=optimizer) print('Ready for training!\n') # Callbacks callbacks = [] if args.data == 'nyu': callbacks = get_nyu_callbacks(model, basemodel, train_generator, test_generator, load_test_data() if args.full else None, runPath) if args.data == 'unreal': callbacks = get_nyu_callbacks(model, basemodel, train_generator, test_generator, load_test_data() if args.full else None, runPath) # Start training model.fit_generator(train_generator, callbacks=callbacks, validation_data=test_generator, epochs=args.epochs, shuffle=True) # Save the final trained model: