def train(model, iterator, optimizer, criterion, train_loader, teacher_force, device, test_loader): model = model.train() match_score = 0 for epoch in range(iterator): average_loss = [] for k, (src_batch, trg_batch) in enumerate(train_loader): src_tensor = torch.LongTensor(src_batch).to(device) trg_tensor = torch.LongTensor(trg_batch).to(device) outputs = model.forward(src=src_tensor, trg=trg_tensor, train=teacher_force) outputs = outputs[:, 1:].contiguous().view(-1, outputs.shape[-1]).to(device) trg_tensor = trg_tensor[:, 1:].contiguous().view(-1).to(device) loss = criterion(outputs, trg_tensor) average_loss.append(loss.item()) loss.backward() if (k + 1) % 5 == 0: # Wait for several backward steps optimizer.step() # Now we can do an optimizer step optimizer.zero_grad() if ((k + 1) % 10 == 0): print("Epoch: {:d} batch step: [{:d}/{:d}] Loss: {:.4f}".format(epoch + 1, k + 1, len(train_loader), np.mean(average_loss))) print("\nEpoch: {:d} Average Loss: {:.4f} +- {:.4f}\n".format(epoch + 1, np.mean(average_loss), np.std(average_loss))) evaluate(test_loader=test_loader, criterion=criterion, model=model, device=device)
def compute_validation_map(yolact_net, dataset): with torch.no_grad(): yolact_net.eval() print() print("Computing validation mAP (this may take a while)...", flush=True) eval_script.evaluate(yolact_net, dataset, train_mode=True) yolact_net.train()
def compute_validation_map(yolact_net, dataset): with torch.no_grad(): yolact_net.eval() logger = logging.getLogger("yolact.eval") logger.info("Computing validation mAP (this may take a while)...") eval_script.evaluate(yolact_net, dataset, train_mode=True, train_cfg=cfg) yolact_net.train()
def run(args): """ Run the model. Args: """ config.update(args) index_col_name = config["index_col"] if "index_col" in config.keys( ) else None df = pd.read_csv(config["data_path"], index_col=index_col_name) if not os.path.exists(config["output_dir"]): os.makedirs(config["output_dir"]) ts = TimeSeriesDataset(data=df, categorical_cols=config["categorical_cols"], target_col=config["label_col"], seq_length=config["seq_len"], prediction_window=config["prediction_window"]) train_iter, test_iter, nb_features = ts.get_loaders( batch_size=config["batch_size"]) model = AutoEncForecast(config, input_size=nb_features).to(config["device"]) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"]) if config["do_eval"] and config["ckpt"]: model, _, loss, epoch = load_checkpoint(config["ckpt"], model, optimizer, config["device"]) evaluate(test_iter, loss, model, config) elif config["do_train"]: train(train_iter, test_iter, model, criterion, optimizer, config)
def main_equlize(model): parser = argparse.ArgumentParser( description='Parameter of Network equalization.') parser.add_argument('--equalizedModel', default="Inceptionv3_equalized.h5", help='save path of equalized Model') parser.add_argument('--scaleThresh', default=16, help='scaling Thresh') parser.add_argument('--imagedir', default="elephant.jpg", help='Image file dir for equalization') args = parser.parse_args() e_model_path = args.equalizedModel num_data, label_to_name, val_generator = prepare_data( num_data=100, data_dir="../data/ILSVRC2012_img_val/", val_file="val.txt", mapping_file="synset_words.txt") print("Before equalization..............................") evaluate(model) print("Start equalization..............................") equerlizer = Equalizer(model, e_model_path, args.scaleThresh) equerlizer.eval() for batch in tqdm(val_generator): img, label = batch equerlizer.equalization(img) equerlizer.save_weight() model.load_weights(e_model_path) print("After equalization...............................") evaluate(model)
def main(): """ Main function. """ logging.basicConfig(level=logging.INFO) args = parse_args() logging.info(f'Commandline:\n{" ".join(sys.argv)}') cfg = Config.fromfile(args.config) update_config = f' --update_config {args.update_config}' if args.update_config else '' if is_clustering_needed(cfg): update_config = cluster(cfg, args.config, update_config) logging.info('Training started ...') training_info = train(args.config, args.gpu_num, update_config) logging.info('... training completed.') work_dir = get_work_dir(cfg, args.update_config) logging.info('Evaluation started ...') evaluate(os.path.join(work_dir, "config.py"), os.path.join(work_dir, "latest.pth"), args.out, '', args.show_dir) logging.info('... evaluation completed.') with open(args.out, 'a+') as dst_file: yaml.dump(training_info, dst_file)
def run(): ######################################################################## # Register Prediction Start ######################################################################## aicrowd_helpers.execution_start() ######################################################################## # Gather Input and Output paths from environment variables ######################################################################## test_images_path, predictions_output_path = gather_input_output_path() ######################################################################## # Gather Image IDS ######################################################################## image_ids = gather_image_ids(test_images_path) ######################################################################## # Generate Predictions ######################################################################## evaluate(AICROWD_TEST_IMAGES_PATH, AICROWD_PREDICTIONS_OUTPUT_PATH) ######################################################################## # Register Prediction Complete ######################################################################## aicrowd_helpers.execution_success( {"predictions_output_path": predictions_output_path})
def test(dataset, model, epoch, args): print("{} epoch: \t start validation....".format(epoch)) model = model.module model.eval() model.is_training = False with torch.no_grad(): evaluate(dataset, model)
def train_model(model, x, y, loss_op, pred_op, train_images, train_labels): train_op = model.training(loss_op, x, FLAGS.learning_rate, FLAGS.decay_step, FLAGS.decay_factor) sess = init_session() minibatch_gen = batch_gen(FLAGS.batch_size, train_images.shape[0], max_batches=FLAGS.max_steps, replace=True) print("training model...") start_time = time.time() for minibatch in minibatch_gen: batch_images, batch_labels = train_images[minibatch], \ train_labels[minibatch] feed_dict = {x: batch_images, y: batch_labels} _, loss_values = sess.run([train_op, loss_op], feed_dict=feed_dict) if minibatch_gen.counter % 1000 == 0: cur_time = time.time() duration = (cur_time - start_time) start_time = cur_time print('Step %d (%.3f sec): loss = ' % (minibatch_gen.counter, duration) + str(loss_values)) if minibatch_gen.counter % 10000 == 0: model.save_weights(sess, FLAGS.model_dir) evaluate(sess, x, y, pred_op, train_images, train_labels, FLAGS.batch_size) model.save_weights(sess, FLAGS.model_dir)
def before_run(self, run_context): self._step += 1 if self._step % 600 == 0 and self._step > 0: evaluate(False, 500, False) evaluate(True, 1000, False) if self._step % 100 == 0: return tf.train.SessionRunArgs(total_loss) else: return None
def test(dataset, model, epoch, args): print("{} epoch: \t start validation....".format(epoch)) model.eval() model.module.is_training = False with torch.no_grad(): if (args.dataset == 'VOC'): evaluate(dataset, model) else: evaluate_coco(dataset, model)
def main(_): if FLAGS.gpu < 0 or FLAGS.gpu > VARS['num_gpus'] - 1: raise ValueError("The index of the GPU should be between 0 and " "{}".format(VARS['num_gpus'] - 1)) else: gpu_device = '/gpu:{}'.format(FLAGS.gpu) samples = loadJSON('./data/test_truth.json') normalize(samples, gpu_device) saveJSON(samples, './data/test_out_only_oov.json') evaluate(samples, './data/norm_errors_only_oov.json')
def main(_): tf.logging.set_verbosity(tf.logging.INFO) assert FLAGS.checkpoint_dir, "Flag 'checkpoint_dir' must be set." assert FLAGS.eval_dir, "Flag 'eval_dir' must be set." if FLAGS.config_file: for config_file in FLAGS.config_file: gin.parse_config_file(config_file) if FLAGS.params: gin.parse_config(FLAGS.params) eval_.evaluate(FLAGS.checkpoint_dir, FLAGS.eval_dir)
def max_value(b, depth, a, be): if depth == 0: i = evaluate(b.board) return (b, i) v = (b, MIN) """ This was an attempt to sort the list of generated moves before pruning them, but it ended up slowing down the algorithm. evalls = [] for s in b.generate_moves(): new_b = b.deepcopy() new_b.make_move(s) evalls.append((s,evaluate(new_b.board))) sorted(evalls,key=lambda tup : tup[1],reverse=True)""" for s in b.generate_moves(): new_b = b.deepcopy() new_b.make_move(s) value = min_value(new_b, depth - 1, a, be)[1] if v[1] < value: v = (new_b, value) if v[1] >= be: return v a = max(a, v[1]) return v
def compute_validation_map(epoch, iteration, yolact_net, dataset, log: Log = None): with torch.no_grad(): yolact_net.eval() start = time.time() print() print("Computing validation mAP (this may take a while)...", flush=True) val_info = eval_script.evaluate(yolact_net, dataset, train_mode=True) mapData.append([ epoch, val_info['box']['all'], val_info['box'][55], val_info['box'][65], val_info['box'][75], val_info['box'][85], val_info['box'][95], val_info['mask']['all'], val_info['mask'][55], val_info['mask'][65], val_info['mask'][75], val_info['mask'][85], val_info['mask'][95] ]) end = time.time() if log is not None: log.log('val', val_info, elapsed=(end - start), epoch=epoch, iter=iteration) yolact_net.train() return val_info
def main(): if len(sys.argv) <= 2: print( "please specify a number of examples and a model name (e.g. models.baseline.random_handle)" ) sys.exit(1) eval_set_size = int(sys.argv[1]) module_name = sys.argv[2] # splitting training data print("splitting training data into", eval_set_size, "(test) v. rest (train)") data.load_train() tweets = np.array(data.TRAIN) np.random.seed(SEED) np.random.shuffle(tweets) test_tweets, train_tweets = tweets[:eval_set_size], tweets[eval_set_size:] hyper_parameters = models.parse_hyper_parameters(sys.argv[3:]) model_class = importlib.import_module(module_name).Model print("Model:", module_name, hyper_parameters) print("Training...") model = model_class(tqdm(train_tweets, dynamic_ncols=True), **hyper_parameters) print("Evaluating...") accuracy, correct, tests = eval.evaluate( model, tqdm(test_tweets, dynamic_ncols=True)) print(f"Label accuracy: {correct}/{tests} ({accuracy:%})")
def validate(model, loader): model.eval() distances = [] labels = [] for pair1, pair2, label in tqdm(loader): pair1 = pair1.to(DEVICE) pair2 = pair2.to(DEVICE) embds1 = model(pair1).cpu() embds2 = model(pair2).cpu() distance = (embds1 - embds2).norm(p=2, dim=1) distances.append(distance) labels.append(label) distances = torch.cat(distances) labels = torch.cat(labels) best_threshold, tar, far, precision, accuracy, fig = evaluate( distances.detach().numpy(), labels.numpy(), True) if log_wandb: wandb.log({"ROC Curve": fig}, commit=False) wandb.log({ "Recall": tar, "Precision": precision, "Accuracy": accuracy, "FAR": far }) return tar, precision, accuracy, far, best_threshold
def main(): filename = './origin_data/bugreports.xml' path = './bug_reports' bugslist = utils.read_xml(filename) # print(bugslist) label = utils.read_label('./origin_data/goldset.txt') # print(label) samples, ids = utils.get_content(bugslist) # print(samples) num_word_list, numword = utils.count_word(samples) # print(len(num_word_list)) # for i in num_word_list: # num_sentence.append(len(i)) utils.savefile(samples) # print(num_sentence) results = textrank.bugsum(path, numword, num_word_list) print(len(i) for i in results) # extra_ids = index2id(results,ids) # print(len(extra_ids)) pred = eval.index2pred(results, ids) y = eval.label2y(label, ids) mean_acc, mean_pr, mean_re, mean_f1 = eval.evaluate(y, pred) print('mean_acc, mean_pr, mean_re, mean_f1', mean_acc, mean_pr, mean_re, mean_f1)
def validate(opt, model, device, epoch, log_dir): global best_acc val_acc, val_threshold = evaluate(model, 'PTH', opt.input_shape[1:], device, opt.lfw_root, opt.lfw_test_list) # save checkpoint with best accuracy if val_acc > best_acc: os.makedirs(log_dir, exist_ok=True) checkpoint_dir = os.path.join( log_dir, 'ep{epoch:03d}-val_acc{val_acc:.3f}-val_threshold{val_threshold:.3f}.pth' .format(epoch=epoch + 1, val_acc=val_acc, val_threshold=val_threshold)) torch.save(model, checkpoint_dir) print( 'Epoch {epoch:03d}: val_acc improved from {best_acc:.3f} to {val_acc:.3f}, saving model to {checkpoint_dir}' .format(epoch=epoch + 1, best_acc=best_acc, val_acc=val_acc, checkpoint_dir=checkpoint_dir)) best_acc = val_acc else: print('Epoch {epoch:03d}: val_acc did not improve from {best_acc:.3f}'. format(epoch=epoch + 1, best_acc=best_acc))
def evaluate_agents(agents, num_game, seed, bomb, device, num_run=1, verbose=True): num_player = len(agents) assert num_player > 1, "1 weight file per player" scores = [] perfect = 0 for i in range(num_run): _, _, score, p = evaluate( agents, num_game, num_game * i + seed, bomb, 0, True, # in op paper, sad was a default device=device, ) scores.extend(score) perfect += p mean = np.mean(scores) sem = np.std(scores) / np.sqrt(len(scores)) perfect_rate = perfect / (num_game * num_run) if verbose: print("score: %f +/- %f" % (mean, sem), "; perfect: ", perfect_rate) return mean, sem, perfect_rate
def ep(input, debug=True): ast = parse(input) if debug: print ast val = evaluate(ast) if val is not None and debug: print(_to_scheme_str(val))
def compute_validation_map(epoch, iteration, yolact_net, dataset, log: Log = None): ''' - 훈련이 종료되었을 때 호출되는 함수이다. - yolact모델을 eval모드로 바꾼 뒤, 최종 mAP를 측정한다. - 함수가 끝난 뒤에는 다시 train모드로 바꾼다. ''' with torch.no_grad(): yolact_net.eval() start = time.time() print() print("Computing validation mAP (this may take a while)...", flush=True) val_info = eval_script.evaluate(yolact_net, dataset, train_mode=True) end = time.time() if log is not None: log.log('val', val_info, elapsed=(end - start), epoch=epoch, iter=iteration) yolact_net.train()
def compute_validation_map(epoch, iteration, yolact_net, dataset, log: Log = None, wandb=None): with torch.no_grad(): yolact_net.eval() start = time.time() print() print("Computing validation mAP (this may take a while)...", flush=True) val_info = eval_script.evaluate(yolact_net, dataset, train_mode=True) end = time.time() if log is not None: log.log('val', val_info, elapsed=(end - start), epoch=epoch, iter=iteration) if wandb is not None: wandb.log({ "Box Accuracy": val_info['box']['all'], "Mask Accuracy": val_info['mask']['all'] }) yolact_net.train()
def main_NeuMF(hyper_params, gpu_id=None): from pytorch_models.NeuMF import GMF, MLP, NeuMF from data import load_data from eval import evaluate, eval_ranking from utils import load_user_item_counts, is_cuda_available from utils import xavier_init, log_end_epoch from loss import MSELoss import torch user_count, item_count = load_user_item_counts(hyper_params) train_reader, test_reader, val_reader, hyper_params = load_data( hyper_params) start_time = time.time() initial_path = hyper_params['model_path'] # Pre-Training the GMF Model hyper_params['model_path'] = initial_path + "_gmf" gmf_model = GMF(hyper_params) if is_cuda_available: gmf_model = gmf_model.cuda() xavier_init(gmf_model) gmf_model = train_complete(hyper_params, GMF, train_reader, val_reader, user_count, item_count, gmf_model) # Pre-Training the MLP Model hyper_params['model_path'] = initial_path + "_mlp" mlp_model = MLP(hyper_params) if is_cuda_available: mlp_model = mlp_model.cuda() xavier_init(mlp_model) mlp_model = train_complete(hyper_params, MLP, train_reader, val_reader, user_count, item_count, mlp_model) # Training the final NeuMF Model hyper_params['model_path'] = initial_path model = NeuMF(hyper_params) if is_cuda_available: model = model.cuda() model.init(gmf_model, mlp_model) model = train_complete(hyper_params, NeuMF, train_reader, val_reader, user_count, item_count, model) # Evaluating the final model for MSE on test-set criterion = MSELoss(hyper_params) metrics, user_count_mse_map, item_count_mse_map = evaluate(model, criterion, test_reader, hyper_params, user_count, item_count, review=False) # Evaluating the final model for HR@1 on test-set metrics.update(eval_ranking(model, test_reader, hyper_params, review=False)) log_end_epoch(hyper_params, metrics, 'final', (time.time() - start_time), metrics_on='(TEST)') return metrics, user_count_mse_map, item_count_mse_map
def min_value(b, depth, a, be): if depth == 0: i = evaluate(b.board) return (b, i) v = (b, MAX) """ This was part of the above attempt to sort the moves. evalls = [] for s in b.generate_moves(): new_b = b.deepcopy() new_b.make_move(s) evalls.append((s,evaluate(new_b.board))) sorted(evalls,key=lambda tup : tup[1],reverse=False)""" for s in b.generate_moves(): new_b = b.deepcopy() new_b.make_move(s) value = max_value(new_b, depth - 1, a, be)[1] if v[1] > value: v = (new_b, value) if v[1] <= a: return v be = min(be, v[1]) return v
def train_model(sess, model, x, y, train_op, loss_op, pred_op, weights, train_images, train_labels, batch_size, max_steps, model_dir, bits, stop, log=False): minibatch_gen = batch_gen(batch_size, train_images.shape[0], max_batches=max_steps, replace=True) current_acc, highest_acc = 0.0, 0.0 best_weights = model.get_weights(sess, weights) if log: print("training model...") start_time = time.time() for minibatch in minibatch_gen: batch_images, batch_labels = train_images[minibatch], \ train_labels[minibatch] feed_dict = {x: batch_images, y: batch_labels} _, loss_values = sess.run([train_op, loss_op], feed_dict=feed_dict) if minibatch_gen.counter % 1000 == 0: cur_time = time.time() duration = (cur_time - start_time) start_time = cur_time if log: print('Step %d (%.3f sec): loss = ' % (minibatch_gen.counter, duration) + str(loss_values)) model.save_weights(sess, best_weights, model_dir, num_bits=bits) current_acc = evaluate(sess, x, y, pred_op, train_images, train_labels, batch_size) if current_acc >= highest_acc: highest_acc = current_acc best_weights = model.get_weights(sess, weights) if (current_acc >= stop): if log: print("Reached stopping accuracy.") return best_weights, current_acc yield best_weights, current_acc if log: print("highest accuracy: %f" % highest_acc) yield best_weights, highest_acc
def deploy(prev_values=False, model_name=None, seed=None, batch_size=32, max_steps=2000, dropout=0.85, learning_rate=0.001, success_boundary=None): if prev_values: print("------- New model -------") print("Model: " + model_name) print("Seed: " + seed) print("Batch_size: " + str(batch_size)) print("Max_steps: " + str(max_steps)) print("Dropout: " + str(dropout)) print("Learning_rate: " + str(learning_rate)) print("Success_boundary: " + success_boundary) """Preparing data""" import pre_input pre_input.generate_txt_with_all_images(filtered_train_data_dir, img_paths_txt_train_path) pre_input.generate_txt_with_all_images(filtered_eval_data_dir, img_paths_txt_eval_path) """Create bottlenecks with 'inference_bottlecks.py' first""" import inference_bottlenecks inference_bottlenecks.inference_bottlenecks(img_paths_txt_train_path, bottlenecks_train_dir, model) inference_bottlenecks.inference_bottlenecks(img_paths_txt_eval_path, bottlenecks_eval_dir, model) pre_input.create_diff_dataset_txt(bottlenecks_train_dir, diff_dataset_txt_train_path, num_tuples_per_class) pre_input.create_diff_dataset_txt(bottlenecks_eval_dir, diff_dataset_txt_eval_path, num_tuples_per_class=3) pre_input.generate_tfrecord_files(diff_dataset_txt_train_path, tfrecord_train_file_path) pre_input.generate_tfrecord_files(diff_dataset_txt_eval_path, tfrecord_test_file_path) """Training step""" import train train.train(model) """Eval step""" import eval eval.evaluate(model)
def train_and_evaluate(model, train_dataloader, val_dataloader, optimizer, loss_fn, metrics, params, model_dir, restore_file=None): global scheduler best_val_acc = 0.0 # learning rate schedulers for different models: if params.model_version == "densenet": scheduler = StepLR(optimizer, step_size=100, gamma=0.1) # for cnn models, num_epoch is always < 100, so it's intentionally not using scheduler here elif params.model_version == "cnn": scheduler = StepLR(optimizer, step_size=100, gamma=0.1) t0 = time.time() for epoch in range(params.num_epochs): # Run one epoch logging.info("Epoch {}/{}".format(epoch + 1, params.num_epochs)) # compute number of batches in one epoch (one full pass over the training set) train(model, optimizer, loss_fn, train_dataloader, metrics, params) scheduler.step() # Evaluate for one epoch on validation set val_metrics = evaluate(model, loss_fn, val_dataloader, metrics, params) val_acc = val_metrics['accuracy'] is_best = val_acc >= best_val_acc # Save weights utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optim_dict': optimizer.state_dict() }, is_best=is_best, checkpoint=model_dir) # If best_eval, best_save_path if is_best: logging.info("- Found new best accuracy") best_val_acc = val_acc # Save best val metrics in a json file in the model directory best_json_path = os.path.join(model_dir, "metrics_val_best_weights.json") utils.save_dict_to_json(val_metrics, best_json_path) # Save latest val metrics in a json file in the model directory last_json_path = os.path.join(model_dir, "metrics_val_last_weights.json") utils.save_dict_to_json(val_metrics, last_json_path) print('{} seconds'.format(time.time() - t0))
def decode_and_evaluate(model, insts, name): ##decode step for inst in insts: dy.renew_cg() inst.prediction = model.decode(inst.input.word_ids, inst.input.char_ids) ## evaluation metrics = eval.evaluate(insts) print("[%s set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, metrics[0], metrics[1], metrics[2])) return metrics
def search(S): s = 0 p = param.Parameters() while s < S: p.randomize() solution = sim.simulate(sat.Satellite(planet.Earth, p)) print (s+1, ": <eval>\t$", eval.evaluate(solution), "\t", solution.distMin, "\t", solution.tMin, "\t", p.velX, "\t", p.velY, "\t", p.friction) s += 1
def run(args): """""" # setting up neptune experiment neptune_project = neptune.init(api_token=os.environ["NEPTUNE_API_TOKEN"], project_qualified_name='{}/{}'.format( args["neptune_username"], args["neptune_project"])) # updating config with the provided flags config.update(args) logging.info("Used config: {}".format(config)) model_config = config.model_config model_class = config.model_class tokenizer_class = config.tokenizer_class model_config = model_config.from_pretrained( config["model_name"], num_labels=config["num_labels"], finetuning_task=config["task_name"]) model_config.update(config) tokenizer = tokenizer_class.from_pretrained(config["tokenizer_name"]) model = model_class(model_config).to(device) # resume training if config["model_path"]: model.load_state_dict( torch.load(config["model_path"], map_location=device)) if config["task_name"] == "multi-label": dataset = datasets.load_dataset("data/toxic_dataset.py") dataset = features_loader_toxicity(dataset, tokenizer, max_length=config["max_length"]) elif config["task_name"] == "multi-class": dataset = datasets.load_dataset("data/conference_dataset.py") dataset = features_loader_conference(dataset, tokenizer, max_length=config["max_length"]) else: raise ValueError(f"Task name '{config['task_name']}' not supported") train_data, test_data = dataset["train"], dataset["test"] train_dataset, test_dataset = get_featurized_dataset( tokenizer, train_data, test_data) if config["do_train"]: global_step, tr_loss = train(train_dataset, test_dataset, model, config, neptune_project) logging.info(" global_step = %s, average loss = %s", global_step, tr_loss) if config["do_eval"]: report = evaluate(test_dataset, model, config) logging.info( "---------------------- Evaluation report ----------------------\n{}" .format(report))
def eval(approach, datapath, incremental): """ Eval a clustering approach on labeled data. """ param_grid = approaches[approach] if not incremental else params[approach] report_path = evaluate(datapath, approach=approach, param_grid=param_grid, incremental=incremental) c.echo('Report compiled at {0}.'.format(report_path))
#A refined form of what we are doing looks so much similar to LDA/PGM. #Deep learning to learn feature end-end is better print 'Begin Loading samples...' train_samples,train_target = load.load_dataset(fname=load.filename['TRAIN'],numdocs=None); print 'number of training sample %d' %len(train_target) print 'Tags for the last train example',train_target[-1] c=defaultdict(float) for each in train_target: for everytag in each: c[everytag]+=1; y = filter(lambda x: c[x]>=500.0 ,c.keys()); #y=['java'] print y M1 = search_classify(True,y,'bow_bigram'); M1.train(train_target,train_samples); eval.evaluate([(M1,u'tfidf_LR.csv')],set(M1.classifyers.keys())); #eval.confused_examples(classname,target,sample,gold,pred, number):
def getParser(): """ :return: A Viterbi Parser """ productions = [] S = nltk.Nonterminal('S') for tree in train_corpus: productions += tree.productions() grammar = nltk.induce_pcfg(S, productions) for p in islice(grammar.productions(), 50): print p return nltk.ViterbiParser(grammar) parser = getParser() #from nltk import Tree #t = Tree('((S(NP-SBJ (PRP They))(ADVP-TMP (RB never))(VP (VBD considered)(S (NP-SBJ (PRP themselves) # (VP (TO to) (VP (VB be) (NP-PRD (NN anything) (RB else)))))))))') #from treeutil import filterLexical #filterLexical(t) #print [postag for _,postag in t.pos()] from eval import evaluate evaluate(test_corpus, parser)
from __future__ import division from sklearn.feature_extraction.text import TfidfVectorizer from collections import defaultdict import feature import load import eval from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import LogisticRegression #from sklearn.neural_network import MLPClassifier #from random import shuffle import numpy as np import sys from sklearn import cross_validation from full_model_2 import * import pickle M1 = pickle.load(open("full_LR","rb")) keywordlist = pickle.load(open("keyword_lists","rb")) eval.evaluate([(M1,u'BOW_NN.csv')],set(list(keywordlist)[:10]));
ignore_terminal_rex=ignore_terminal_rex) # print PrecRecHeader # sys.stdout.write("token_f-score\ttoken_precision\ttoken_recall\tboundary_f-score\tboundary_precision\tboundary_recall\n"); # sys.stdout.flush() with open(trainf) as t: # trainlines = [] # for trainline in t: # trainline = trainline.strip() # if trainline != "": # trainlines.append(trainline) # continue (trainwords,trainstringpos) = eval.read_data([line.strip() for line in t if line.strip()], False, word_split_rex=word_split_rex, ignore_terminal_rex=ignore_terminal_rex) with Capturing() as output: eval.evaluate(options, trainwords, trainstringpos, goldwords, goldstringpos) trainlines = [] r = output[0].split('\t') assert len(r) == len(evals), ('missing evaluation for files {} and {}' .format(goldf, trainf)) res[i+1].append(r) with open(outfile, 'w+') as out: for i, measure in enumerate(evals): out.write(', '.join([measure] + res[0][1:]) + '\n') for l in res[1:]: out.write(', '.join([l[0]] + [e[i] for e in l[1:]]) + '\n')