Exemplo n.º 1
0
def step1(data, args):
    print '### STEP 1: Train for classification task'
    
    pretrained_snapshot_fname = 'model_best_accuracy.th'
    
    train_loader, val_loader, test_loader = data
    
    n_samples_train = len(train_loader.dataset)
    n_samples_val = len(val_loader.dataset)
    n_samples_test = len(test_loader.dataset)

    num_classes = len(set(val_loader.dataset.target_tensor))
    
    model = eval(args.modelArch)(num_classes=num_classes)
    
    best_val_acc = None
    test_acc = None

    # try to load pretrained model if step 1 has already been executed
    saved_model = load_model(model, pretrained_snapshot_fname, args)
    if saved_model is not None:
        print 'Loading pretrained model:', pretrained_snapshot_fname
        model = saved_model
        model.cuda()
    else: 
        # else train a new model
        print 'Training a new model ...'    
        logfile = open(os.path.join(args.workDir, 'log.txt'), 'wb')
        
        model.cuda()
        optimizer = torch.optim.Adam(model.parameters(), args.learningRate)
        
        since = time.time()
        for epoch in trange(1, args.nEpochs + 1, desc='Epochs'):
            avg_loss = train(model, optimizer, epoch, train_loader, logfile, args)
            val_loss, val_acc, n_correct = evaluate(model, val_loader, args)
            
            if best_val_acc is None or best_val_acc < val_acc:
                best_val_acc = val_acc
                tqdm.write('Snapshotting best model: ' + pretrained_snapshot_fname)
                save_model(model, pretrained_snapshot_fname, args)
            
            logline = 'Epoch {:3d}/{}] train_avg_loss = {:.4f}, val_avg_loss = {:.4f}, val_accuracy = {}/{} ({:.2f}%, Best: {:.2f}%)'
            tqdm.write(logline.format(epoch, args.nEpochs, avg_loss, val_loss, n_correct, n_samples_val, val_acc, best_val_acc))
            
        time_elapsed = time.time() - since
        print 'Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)
        model = load_model(model, pretrained_snapshot_fname, args)

    # TESTING -----------------
    if not args.skipTest:
        test_loss, test_acc, n_correct = evaluate(model, test_loader, args)
        logline = 'TEST] test_avg_loss = {:.4f}, test_accuracy = {}/{} ({:.2f}%)'
        print logline.format(test_loss, n_correct, n_samples_test, test_acc)
    
    return model, {'BestValAccuracy': best_val_acc, 'TestAccuracy': test_acc}
Exemplo n.º 2
0
def main(input_file, output_file, is_training=True):
    questions = common.read_qp_dump(input_file)
    output = open(output_file, "w")
    print("generating scores")
    model = common.load_model("wiki")
    wiki_scores = common.generate_scores(questions, model)
    model = common.load_model("google")
    google_scores = common.generate_scores(questions, model)
    model = common.load_model("quora")
    quora_scores = common.generate_scores(questions, model)
    print_combined_scores(wiki_scores,
                          google_scores,
                          quora_scores,
                          file=output)
    output.close()
Exemplo n.º 3
0
def run_evaluations():
	data = []

	test_file = f"output\\nfl\\test\\6.csv"
	model_file = f"models\\nfl\\6_model.pkl"
	output_file = "output\\nfl\\html\\testdata.json"

	model = common.load_model(model_file)
	_, X, y = common.read_data_from_file(test_file, "home_win", get_feature_headers())

	data.append(evaluate.evaluate("6", model, X, y))

	dict = {"data": data}

	with open(output_file, 'w') as summary_file:
		json.dump(dict, summary_file)

	groups = common.read_data_grouped(test_file, ['year'])

	for key in groups:

		X = groups[key][get_feature_headers()]
		y = groups[key]["home_win"]
		
		accuracy, manual_accuracy = evaluate.calculate_accuracy(model, X, y)

		print(f"{key}:{accuracy:.2f}")
def main(args=sys.argv[1:]):
    args = parse_args(args)
    logging.basicConfig(format="%(message)s",
                        filename=args.log_file,
                        level=logging.DEBUG)
    print(args)
    logging.info(args)

    # Read all data
    data_dict = pickle_from_file(args.data_file)
    # Get the appropriate datasplit
    split_dict = pickle_from_file(args.data_split_file)
    recalib_data = data_dict["train"].subset(split_dict["recalibrate_idxs"])

    # Load model
    fitted_model = load_model(args.fitted_file)
    family = fitted_model.density_parametric_form

    if family == "gaussian":
        coverage_dict = recalibrate_intervals_gaussian(fitted_model,
                                                       recalib_data, args)
    elif family == "bernoulli":
        coverage_dict = recalibrate_intervals_bernoulli(
            fitted_model, recalib_data, args)
    elif "multinomial" in family:
        coverage_dict = recalibrate_intervals_multinomial(
            fitted_model, recalib_data, args)
    else:
        raise ValueError("dunno what is going on")
    print(coverage_dict)

    pickle_to_file(coverage_dict, args.out_file)
Exemplo n.º 5
0
def main(args=sys.argv[1:]):
    args = parse_args(args)
    logging.basicConfig(format="%(message)s", filename=args.log_file, level=logging.DEBUG)
    print(args)
    logging.info(args)

    # Read all data
    data_dict = pickle_from_file(args.data_file)
    # Get the appropriate datasplit
    split_dict = pickle_from_file(args.data_split_file)
    recalib_data = data_dict["train"].subset(split_dict["recalibrate_idxs"])

    # Load model
    fitted_model = load_model(args.fitted_file)

    coverage_dict = {}
    for alpha in args.alphas:
        recalibrator = DecisionIntervalRecalibrator(fitted_model, alpha)
        inference_dict = recalibrator.recalibrate(recalib_data)
        print("RECALIB INF DICT", inference_dict["cov_given_accept"])
        est_cov_given_accept = inference_dict["cov_given_accept"]["mean"]
        logging.info("Alpha %f, ideal cov %f, est cov|accept %f", alpha, 1 - alpha, est_cov_given_accept)
        logging.info(get_normal_ci(inference_dict["cov_given_accept"]))
        coverage_dict[alpha] = inference_dict
    pickle_to_file(coverage_dict, args.out_file)
Exemplo n.º 6
0
def restore_or_create_model(num_train_examples, num_labels, global_batch_size,
                            options):
    checkpoints = get_checkpoint_files(options.checkpoint_dir)
    print('Found {} checkpoint files: {}'.format(
        len(checkpoints), checkpoints), file=sys.stderr, flush=True)
    for checkpoint in checkpoints:    # sorted by ctime
        print('Restoring from checkpoint', checkpoint, file=sys.stderr,
              flush=True)
        try:
            return load_model(checkpoint)
        except Exception as e:
            warning('Failed to restore from checkpoint {}: {}'.format(
                checkpoint, e))

    # No checkpoint could be loaded
    print('Creating new model', file=sys.stderr, flush=True)
    pretrained_model = load_pretrained(options)
    output_offset = int(options.max_seq_length/2)
    model = create_model(pretrained_model, num_labels, output_offset,
                         options.output_layer)
    optimizer = create_optimizer(num_train_examples, global_batch_size,
                                 options)
    model.compile(
        optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['sparse_categorical_accuracy']
    )
    return model
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()
    # Required parameters
    parser.add_argument("--do_train", default=True, action='store_true')
    parser.add_argument('--do_eval', default=False, action='store_true')
    parser.add_argument("--do_predict", default=False, action='store_true')

    parser.add_argument('--markup',
                        default='bieos',
                        type=str,
                        choices=['bios', 'bio', 'bieos'])  # 标签类型
    parser.add_argument("--arch", default='bilstm_crf', type=str)
    parser.add_argument('--learning_rate', default=0.001, type=float)
    parser.add_argument('--seed', default=1234, type=int)
    parser.add_argument('--gpu', default='', type=str)
    parser.add_argument('--epochs', default=100, type=int)
    parser.add_argument('--batch_size', default=32, type=int)
    parser.add_argument('--embedding_size', default=128, type=int)
    parser.add_argument('--hidden_size', default=384, type=int)
    parser.add_argument("--grad_norm",
                        default=5.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument("--task_name", type=str, default='ner')
    args = parser.parse_args()
    args.data_dir = config.data_dir
    if not config.output_dir.exists():
        args.output_dir.mkdir()
    args.output_dir = config.output_dir / '{}'.format(args.arch)
    if not args.output_dir.exists():
        args.output_dir.mkdir()
    init_logger(log_file=str(args.output_dir /
                             '{}-{}.log'.format(args.arch, args.task_name)))
    seed_everything(args.seed)
    if args.gpu != '':
        args.device = torch.device(f"cuda:{args.gpu}")
    else:
        args.device = torch.device("cpu")
    args.id2label = {i: label for i, label in enumerate(config.label2id)}
    args.label2id = config.label2id

    processor = CluenerProcessor(data_dir=config.data_dir)
    processor.get_vocab()

    model = BERT_NERModel(device=args.device,
                          label2id=args.label2id,
                          need_birnn=True)
    # model = NERModel(vocab_size=len(processor.vocab), embedding_size=args.embedding_size,
    #                  hidden_size=args.hidden_size, device=args.device, label2id=args.label2id)

    model.to(args.device)

    if args.do_train:
        train(args, model, processor)
    if args.do_eval:
        model_path = args.output_dir / 'best-model.bin'
        model = load_model(model, model_path=str(model_path))
        evaluate(args, model, processor)
    if args.do_predict:
        predict(args, model, processor)
Exemplo n.º 8
0
def strategy_evaluation(data_file: str, model_file: str,
                        feature_columns: List[str], summary_file: str):

    model = common.load_model(model_file)

    data, X, y = common.read_data_from_file(data_file, "home_win",
                                            feature_columns)

    predictions = model.predict(X)
    probabilities = model.predict_proba(X)

    strat = strategies.all_strategies()

    for index, row in data.iterrows():
        for s in strat:
            s.evaluate(row)

    for s in strat:
        r = s.get_results()
        profits = r.profits()
        arr = [
            r.name, r.candidates, r.matches, r.covered, r.not_covered,
            profits[0], profits[1]
        ]
        add_to_json_summary(summary_file, arr)
def main(args=sys.argv[1:]):
    args = parse_args(args)
    np.random.seed(args.seed)

    # Read all data
    orig_data_dict = pickle_from_file(args.data_file)
    # Get the appropriate datasplit
    split_dict = pickle_from_file(args.data_split_file)
    recalib_data = orig_data_dict["train"].subset(
        split_dict["recalibrate_idxs"])
    args.num_p = recalib_data.x.shape[1]

    # Load models
    fitted_dicts = []
    #for fitted_file, coverage_file in zip(args.fitted_files, args.coverage_files):
    for fitted_file in args.fitted_files:
        fitted_model = load_model(fitted_file)

        #coverage_dict = pickle_from_file(coverage_file)

        fitted_dicts.append({"model": fitted_model})
        #"coverage_dict": coverage_dict})
    print("fitted dicts", len(fitted_dicts))

    # Do all the plotting
    new_data, _ = orig_data_dict["data_gen"].create_data(args.num_test)
    #plot_PI_diam(fitted_dicts, new_data, args)
    #plot_coverages(fitted_dicts, new_data, args)
    #plot_accept_probs(
    #        [d["model"] for d in fitted_dicts],
    #        new_data,
    #        args)
    if args.num_p == 2:
        plot_accepted_rejected_region(orig_data_dict,
                                      [d["model"] for d in fitted_dicts], args)
Exemplo n.º 10
0
def main(dataset_path, working_dir, testing_path, testing_working_dir,
         dimensions, batch_size, number_classes, epochs):
    def generator_augmented():
        while True:
            while not dataset_loader.done():
                x, y = dataset_loader.get_training_batch()
                gen = datagen.flow(x, y, batch_size=batch_size)
                x_augmented, y_augmented = next(gen)
                yield np.concatenate((x, x_augmented), axis=0), np.concatenate(
                    (y, y_augmented), axis=0)
            dataset_loader.reset()

    def generator():
        while True:
            while not dataset_loader.done():
                x, y = dataset_loader.get_training_batch()
                yield x, y
            dataset_loader.reset()

    # model = alexnet(dimensions, number_classes).get_model()
    model = load_model()
    reduce_lr = ReduceLROnPlateau(monitor='loss',
                                  factor=0.1,
                                  patience=5,
                                  min_lr=0.001)
    sgd_optimizer = optimizers.SGD(lr=0.01, momentum=0.9, decay=0.0005)
    dataset_loader = imagerecognition.dataset_loader(dataset_path, working_dir,
                                                     dimensions, batch_size)
    datagen = ImageDataGenerator(rotation_range=20,
                                 width_shift_range=0.2,
                                 height_shift_range=0.2,
                                 horizontal_flip=True,
                                 vertical_flip=True)

    model.compile(sgd_optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.fit_generator(generator(),
                        steps_per_epoch=dataset_loader.length / batch_size,
                        epochs=epochs,
                        callbacks=[reduce_lr])
    dataset_loader.delete_from_disk()
    save_model(model)

    dataset_loader = imagerecognition.dataset_loader(testing_path,
                                                     testing_working_dir,
                                                     dimensions, batch_size)
    print('Testing...')
    consolidated_images, consolidated_labels = dataset_loader.get_training_batch(
    )
    while not dataset_loader.done():
        temp_images, temp_labels = dataset_loader.get_training_batch()
        consolidated_images = np.concatenate(
            (consolidated_images, temp_images), axis=0)
        consolidated_labels = np.concatenate(
            (consolidated_labels, temp_labels), axis=0)
    print(model.evaluate(consolidated_images, consolidated_labels))
    dataset_loader.delete_from_disk()
Exemplo n.º 11
0
def main(argv):
    args = argparser().parse_args(argv[1:])
    examples = load_examples(args.data)
    clf, vecf = load_model(args.model)
    X = vecf.transform([e.text for e in examples])
    for e, c, s in zip(examples, clf.predict(X), clf.decision_function(X)):
        text = e.text if args.truncate is None else e.text[:args.truncate]
        print('{}\t{}\t{}\t{}\t{}'.format(e.id_, e.class_, c, s, text))
    return 0
Exemplo n.º 12
0
 def __init__(self,
              model_data_path=TF_FEAT_MODEL_PATH,
              image_w=TF_FEAT_IMG_W,
              image_h=TF_FEAT_IMG_H):
     self.image_w = image_w
     self.image_h = image_h
     self.graph = tf.Graph()
     self.sess = tf.Session(graph=self.graph)
     start_time = time()
     with self.graph.as_default():
         load_model(model_data_path)
         gd = self.sess.graph.as_graph_def()
         self.image_ph = tf.get_default_graph().get_tensor_by_name(
             "input:0")
         self.score = tf.get_default_graph().get_tensor_by_name(
             "confidence_st:0")
         self.model_data_path = model_data_path
     end_time = time()
     print('load model use {:.2f}ms'.format((end_time - start_time) * 1000))
Exemplo n.º 13
0
def main(argv):
    args = argument_parser('serve').parse_args(argv[1:])
    session = tf.Session()
    graph = tf.get_default_graph()
    with graph.as_default():
        with session.as_default():
            app.model, app.tokenizer, app.labels, app.model_config = load_model(
                args.model_dir)
            app.session = session
            app.graph = graph
    app.run(port=args.port, debug=True)
    return 0
Exemplo n.º 14
0
def main(argv):
    args = argparser().parse_args(argv[1:])
    print('loading model from {} ...'.format(args.model),
          file=sys.stderr, flush=True)
    clf, vecf = load_model(args.model)
    print('loaded model from {} ...'.format(args.model),
          file=sys.stderr, flush=True)
    for fn in args.data:
        print('processing {} ...'.format(os.path.basename(fn)),
              file=sys.stderr, flush=True)
        process(fn, clf, vecf, args)
        print('completed {}.'.format(os.path.basename(fn)),
              file=sys.stderr, flush=True)
    return 0
Exemplo n.º 15
0
def run_evaluations(model_file: str, model_name: str, data_file: str,
                    feature_columns: List[str], summary_file: str):

    model = common.load_model(model_file)

    _, X, y = common.read_data_from_file(data_file, "home_win",
                                         feature_columns)

    eval_results = evaluate.evaluate(f"{model_name}", model, X, y)

    add_to_json_summary(summary_file, eval_results)

    accuracy, _ = evaluate.calculate_accuracy(model, X, y)

    return accuracy
def saveData2(file_name, out_file_name):
    model = common.load_model("google")
    time1 = time.time()
    train_qn_pairs = pickle.load(open(file_name, "rb"))
    time2 = time.time()
    print("Loaded Pickle : %f min" % ((time2 - time1) / 60))
    train_data = get_embedded_sentence(train_qn_pairs, model)
    time3 = time.time()
    model = None
    train_qn_pairs = None
    gc.collect()
    print("Obtained Embeddings: %f min" % ((time3 - time2) / 60))
    h5f = h5py.File(out_file_name, 'w')
    h5f.create_dataset('embeddings', train_data.shape, data=train_data)
    h5f.close()
Exemplo n.º 17
0
def main(args=sys.argv[1:]):
    args = parse_args(args)
    data_dict = pickle_from_file(args.data_file)
    test_data, _ = data_dict["data_gen"].create_data(args.num_test, args.seed)
    args.num_p = test_data.x.shape[1]

    fitted_model = load_model(args.fitted_file)

    # Look at the region we accepted
    plot_accepted_rejected_region(data_dict, fitted_model, args)

    # Look at how good the density estimates are in the
    # accept vs reject region
    plot_densities(test_data, fitted_model, args)

    recalibrated_dict = pickle_from_file(args.recalibrated_file)
    check_recalibration_covered(fitted_model, recalibrated_dict, test_data)
Exemplo n.º 18
0
def run_on_test_data():
	cnn_model = keras.models.load_model('cnn_model.h5')
	test_questions = common.read_qp_dump('data/test_qn_pairs.p')
	model = common.load_model("google")
	predictions = np.zeros(len(test_questions))
	submission = open('cnn-predictions.csv', 'w')
	print("test_id,is_duplicate", file = submission)
	for ind, question_pair in enumerate(test_questions):
		combined = np.zeros((1,2*maxlen,embedding_dims))
		v1 = sentence2vec(question_pair.question_1, model)
		v2 = sentence2vec(question_pair.question_2, model)
		if(len(v1)>0 and len(v2)>0):
			combined[0,:min(maxlen,len(v1)),:] = v1[:maxlen,:]
			combined[0, maxlen:(maxlen+min(maxlen,len(v2))),:] = v2[:maxlen,:]
			print("%s,%f"%(question_pair.id, cnn_model.predict_proba(combined, verbose = False)[0][0]), file = submission)
		else:
			print("%s,0.37"%(question_pair.id, ), file = submission)
	submission.close()
Exemplo n.º 19
0
def calibration(data_file: str, model_file: str, feature_columns: List[str]):

    model = common.load_model(model_file)

    data, X, y = common.read_data_from_file(data_file, "home_win",
                                            feature_columns)

    predictions = model.predict(X)
    probabilities = model.predict_proba(X)

    calibration_map: Dict = {}

    for idx, val in enumerate(predictions):
        true_outcome = y[idx]
        predicted_outcome = predictions[idx]
        confidence = float(max(probabilities[idx]))

        # calibration bits
        calibration_key = int(confidence * 100)
        calibration_key = calibration_key - (calibration_key % 5)

        if calibration_key not in calibration_map:
            calibration_map[calibration_key] = (0, 0)

        wins_losses = calibration_map[calibration_key]
        if predicted_outcome == true_outcome:
            wins_losses = (wins_losses[0] + 1, wins_losses[1])
        else:
            wins_losses = (wins_losses[0], wins_losses[1] + 1)
        calibration_map[calibration_key] = wins_losses
        # end calibration

    with open("calibration.csv", "w", newline='') as o:
        writer = csv.writer(o)
        writer.writerow(["index", "predicted", "actual", "number_of_games"])
        for pct in calibration_map:
            wins_losses = calibration_map[pct]
            number_of_games = wins_losses[0] + wins_losses[1]
            true_pct = wins_losses[0] / number_of_games
            true_pct = int(true_pct * 100)

            # don't bother with small sample size
            if number_of_games > 20:
                writer.writerow([pct, pct, true_pct, number_of_games])
Exemplo n.º 20
0
def main():

	if (edgetpu==1):
        mdl = model_edgetpu
    else:
         mdl = model
  
    interpreter, labels =cm.load_model(model_dir,model_edgetpu,lbl,edgetpu)
    
    fps=1
   
    while True:
        start_time=time.time()
        
        #----------------Capture Camera Frame-----------------
        ret, frame = cap.read()
        if not ret:
            break
        
        cv2_im = frame
        cv2_im = cv2.flip(cv2_im, 0)
        cv2_im = cv2.flip(cv2_im, 1)

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)
       
        #-------------------Inference---------------------------------
        cm.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = cm.get_output(interpreter, score_threshold=threshold, top_k=top_k)
        
        #-----------------other------------------------------------
        track_object(objs,labels)#tracking  <<<<<<<
       
        fps = round(1.0 / (time.time() - start_time),1)
        print("*********FPS: ",fps,"************")

        #-----------------------------------------------------
        

    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 21
0
def main(args=sys.argv[1:]):
    args = parse_args(args)
    np.random.seed(args.seed)

    # Read all data
    orig_data_dict = pickle_from_file(args.data_file)
    # Get the appropriate datasplit
    split_dict = pickle_from_file(args.data_split_file)
    recalib_data = orig_data_dict["train"].subset(
        split_dict["recalibrate_idxs"])
    args.num_p = recalib_data.x.shape[1]

    # Load models
    fitted_models = [
        load_model(fitted_file) for fitted_file in args.fitted_files
    ]

    # Do all the plotting
    if args.num_p == 2:
        plot_accepted_rejected_region(orig_data_dict, fitted_models, args)
Exemplo n.º 22
0
def daily_evaluation(data_file: str, model_file: str,
                     feature_columns: List[str], summary_file: str):

    model = common.load_model(model_file)

    data, X, y = common.read_data_from_file(data_file, "home_win",
                                            feature_columns)

    predictions = model.predict(X)
    probabilities = model.predict_proba(X)

    strat = strategies.all_strategies()

    for idx, val in enumerate(predictions):
        true_outcome = y[idx]
        predicted_outcome = predictions[idx]
        confidence = float(max(probabilities[idx]))

        away = data.iloc[idx]["away"]
        home = data.iloc[idx]["home"]
        date = data.iloc[idx]["date"]

        if true_outcome == 1:
            winner = home
        else:
            winner = away

        if predicted_outcome == 1:
            predicted_winner = home
        else:
            predicted_winner = away

        for s in strat:
            s.evaluate(data.iloc[idx])

        add_to_json_summary(
            summary_file,
            [date, away, home, winner, predicted_winner, confidence])

    for s in strat:
        s.get_results()
Exemplo n.º 23
0
def main(argv):
    args = argument_parser('test').parse_args(argv[1:])

    model, tokenizer, labels, config = load_model(args.model_dir)
    test_labels, test_texts = load_tsv_data(args.test_data, args)

    max_seq_len = config['max_seq_length']
    replace_span = config['replace_span']

    label_map = {t: i for i, t in enumerate(labels)}
    inv_label_map = {v: k for k, v in label_map.items()}

    test_tok = tokenize_texts(test_texts, tokenizer)
    test_x = encode_tokenized(test_tok, tokenizer, max_seq_len, replace_span)
    test_y = [label_map[l] for l in test_labels]

    probs = model.predict(test_x, batch_size=args.batch_size)
    preds = np.argmax(probs, axis=-1)
    correct, total = sum(g == p for g, p in zip(test_y, preds)), len(test_y)
    print('Test accuracy: {:.1%} ({}/{})'.format(correct / total, correct,
                                                 total))

    return 0
Exemplo n.º 24
0
		output.append(f"{df.iloc[idx]['away_streak']:.2f}")
		output.append(f"{df.iloc[idx]['home_streak']:.2f}")
		output.append(f"{df.iloc[idx]['away_pct']:.2f}")
		output.append(f"{df.iloc[idx]['home_pct']:.2f}")
		output.append(f"{df.iloc[idx]['away_diff']:.2f}")
		output.append(f"{df.iloc[idx]['home_diff']:.2f}")
		
		summary.append(output)
	
	return summary

stats = importer.generate_stats(f"input\\nba\\2018.csv")

model_file, feature_columns = common.read_model_definition("nba_model.csv")

model = common.load_model(model_file)

data = []
games = []

dt = datetime.datetime.now()

lines = scraper.get_gameday_lines(dt)
index = domain.GameLineIndex(lines)

for g in scraper.get_games(dt):

	game_info = domain.NBAGame(1, date = dt)
	game_info.away = g[0]
	game_info.home = g[1]
Exemplo n.º 25
0
def main(args=sys.argv[1:]):
    args = parse_args(args)
    np.random.seed(args.seed)
    logging.basicConfig(format="%(message)s",
                        filename=args.log_file,
                        level=logging.DEBUG)
    logging.info(args)

    data_dict = pickle_from_file(args.data_file)
    test_data, _ = data_dict["data_gen"].create_data(args.num_test)
    fitted_models = []
    agg_dict = {}
    for fitted_file, coverage_file in zip(args.fitted_files,
                                          args.coverage_files):
        fitted_model = load_model(fitted_file)
        fitted_models.append(fitted_model)
        coverage_dict = pickle_from_file(coverage_file)
        for pi_alpha, inference_dict in coverage_dict.items():
            if pi_alpha not in agg_dict:
                agg_dict[pi_alpha] = []
            agg_dict[pi_alpha].append(inference_dict)

    unif_x = data_dict["support_sim_settings"].support_unif_rvs(args.num_test)
    unif_test_data = data_dict["data_gen"].create_data_given_x(unif_x)

    coverage_agg_results = {}
    for pi_alpha, inference_dicts in agg_dict.items():
        aggregator = DecisionIntervalAggregator(fitted_models, pi_alpha,
                                                inference_dicts)
        indiv_test_datas = [
            data_dict["data_gen"].create_data(args.num_test)[0]
            for _ in fitted_models
        ]
        indiv_test_inf_dicts = [
            DecisionIntervalRecalibrator(fitted_model,
                                         pi_alpha).recalibrate(indiv_test_data)
            for fitted_model, indiv_test_data in zip(fitted_models,
                                                     indiv_test_datas)
        ]
        individual_is_covereds = []
        for test_coverage_dict, inf_dict in zip(indiv_test_inf_dicts,
                                                inference_dicts):
            print(inf_dict)
            test_coverage = test_coverage_dict["cov_given_accept"]["mean"]
            test_coverage_ci = get_normal_ci(
                test_coverage_dict["cov_given_accept"], args.ci_alpha)
            individual_ci = get_normal_ci(inf_dict["cov_given_accept"],
                                          args.ci_alpha)
            indiv_covered = individual_ci[
                0] <= test_coverage and test_coverage <= individual_ci[1]
            logging.info("indiv est %f ci %s",
                         inf_dict["cov_given_accept"]["mean"], individual_ci)
            logging.info("true indiv %f ci %s", test_coverage,
                         test_coverage_ci)
            logging.info("indiv is covered? %s", indiv_covered)
            individual_is_covereds.append(indiv_covered)

        # Calculate the width of the individual CI diams for comparison
        individual_ci_diams = get_individual_ci_diams(inference_dicts,
                                                      args.ci_alpha)

        # Evaluate if the true coverage value is covered
        agg_cov_given_accept_dict = aggregator.calc_agg_cover_given_accept(
            args.ci_alpha)
        true_cov_given_accept_dict = aggregator.eval_cov_given_accept(
            test_data)["cov_given_accept"]
        true_cov_given_accept = true_cov_given_accept_dict["mean"]
        agg_ci = agg_cov_given_accept_dict["ci"]
        is_covered = true_cov_given_accept > agg_ci[
            0] and true_cov_given_accept < agg_ci[1]

        # Evaluate coverage if using independence assumption
        indpt_aggregator = DecisionIntervalIndptAggregator(
            fitted_models, pi_alpha, inference_dicts)
        indpt_agg_cov_given_accept_dict = indpt_aggregator.calc_agg_cover_given_accept(
            args.ci_alpha)
        indpt_ci = indpt_agg_cov_given_accept_dict["ci"]
        indpt_is_covered = true_cov_given_accept > indpt_ci[
            0] and true_cov_given_accept < indpt_ci[1]

        coverage_agg_results[pi_alpha] = {
            "is_covered": {
                "agg": [is_covered],
                "independent": [indpt_is_covered],
                "individual": individual_is_covereds
            },
            "ci_diams": {
                "agg": [agg_ci[1] - agg_ci[0]],
                "independent": [indpt_ci[1] - indpt_ci[0]],
                "individual": individual_ci_diams
            },
            "true_cov": {
                "agg": [true_cov_given_accept],
                "independent": [true_cov_given_accept],
                "individual": [
                    test_inf_dict["cov_given_accept"]["mean"]
                    for test_inf_dict in indiv_test_inf_dicts
                ]
            }
        }

        # Evaluate local coverage
        local_coverages = assess_local_agg_coverage_true(
            aggregator, test_data, data_dict["data_gen"])
        for key, val in local_coverages.items():
            coverage_agg_results[pi_alpha][key] = val

        logging.info("PI alpha %f", pi_alpha)
        logging.info("estimated agg cover given accept %f %s",
                     agg_cov_given_accept_dict["mean"], agg_ci)
        logging.info("indepttt estimated agg cover given accept %f %s",
                     indpt_agg_cov_given_accept_dict["mean"], indpt_ci)
        logging.info("true cov given accept %f, se %f", true_cov_given_accept,
                     true_cov_given_accept_dict["se"])
        logging.info("is  covered? %s", is_covered)
        logging.info("indept is  covered? %s", indpt_is_covered)

    logging.info(coverage_agg_results)
    pickle_to_file(coverage_agg_results, args.out_file)
Exemplo n.º 26
0
def run_main_loop(max_duration, replying_enabled): 
    """The out monitor/reply loop for communicating with Twitter
        max_duration: Maximum time to run this loop for (seconds)
        replying_enabled: Reply to tweets?
    """    

    logging.info('max_duration=%d, replying_enabled=%s' % (max_duration, replying_enabled))
    start_time = time.time()
    # Delay between loops
    delay = 0.1
    
    def elapsed(): 
        """Elapsed time since start"""
        return time.time() - start_time
        
    # Load the classification model first
    # This is critical for classifying tweets for reply
    # It is helpful for seeing which tweets would be replied to 
    #  when we are running in non-replying mode
    # The model will not be available in early stage of development
    #  before tweets have been saved and labeled
    model = common.load_model() 
    if replying_enabled:
        assert model, 'Cannot reply without a classification model'
    logging.info('Using %s classifier' % model.__class__.__name__)
        
    # Lastest tweet id (an integer) is stored as text in LATEST_FILE
    # We use to prevent re-reading tweets
    latest_tweet_id = int(file(common.LATEST_FILE, 'rt').read().strip()) if os.path.exists(common.LATEST_FILE) else 0
    logging.info('latest_tweet_id=%d' % latest_tweet_id)
    
    # Load the tweets that have already been replied to
    replied_tweets = load_replied_tweets()

    # Credentials are stored in CREDENTIALS_FILE as text lines of key='value' 
    # The keys are: consumer_key, consumer_secret, access_token_key, access_token_secret 
    RE_CREDENTIALS = re.compile(r"(\w+)='([^']+)'")
    credentials = dict((m.group(1),m.group(2)) 
        for m in RE_CREDENTIALS.finditer(file(common.CREDENTIALS_FILE,'rt').read()))
    
    # Create an object that gives access to the Twitter APIs        
    api = twitter.Api(**credentials)
    
    # Create an Activity object for generating summary tweets
    activity = Activity(api)
    
    # The main loop. Runs for max_duration seconds with delay seconds
    #  between iterations.
    while elapsed() + delay < max_duration:
        time.sleep(delay)
        
        # Fetch tweets that were created since the last time we checked
        latest_tweet_id += 1
        scored_tweets = fetch_latest_scored_tweets(api, model, latest_tweet_id) 
        
        if scored_tweets:
            # Record all tweets
            scored_tweets.sort(key = lambda t: (not t._replyable, -t._score, t._id)) 
            latest_tweet_id = max([t._id for t in scored_tweets])
            file(common.LATEST_FILE, 'wt').write(str(latest_tweet_id))
            record_tweets(scored_tweets)

            replyable_tweets = [t for t in scored_tweets if t._replyable]
            # Reply to all the tweeets that we should reply to 
            if replying_enabled and replyable_tweets:
                reply_to_tweets(api, activity, replied_tweets, replyable_tweets)
        
        # Back off if there were no matching tweets
        delay = 10 if scored_tweets else delay * 2
        delay = max(10, min((abs(elapsed() - max_duration)/10), delay))
        
        logging.info('Found %3d replyable of %3d relevant results, latest_id=%d, sleeping %5.1f sec, running %4d sec (%4d remaining)' % (
                    len([t for t in scored_tweets if t._replyable]), 
                    len(scored_tweets), latest_tweet_id,
                    delay,  elapsed(), max_duration - elapsed()))
Exemplo n.º 27
0
def predict(args,model,processor):
    model_path = args.output_dir / 'best-model.bin'
    model = load_model(model, model_path=str(model_path))
    test_data = []
    with open(str(args.data_dir / "test.json"), 'r') as f:
        idx = 0
        for line in f:
            json_d = {}
            line = json.loads(line.strip())
            text = line['text']
            words = list(text)
            labels = ['O'] * len(words)
            json_d['id'] = idx
            json_d['context'] = " ".join(words)
            json_d['tag'] = " ".join(labels)
            json_d['raw_context'] = "".join(words)
            idx += 1
            test_data.append(json_d)
    pbar = ProgressBar(n_total=len(test_data))
    results = []
    for step, line in enumerate(test_data):
        token_a = line['context'].split(" ")
        input_ids = [processor.vocab.to_index(w) for w in token_a]
        input_mask = [1] * len(token_a)
        input_lens = [len(token_a)]
        model.eval()
        with torch.no_grad():
            input_ids = torch.tensor([input_ids], dtype=torch.long)
            input_mask = torch.tensor([input_mask], dtype=torch.long)
            input_lens = torch.tensor([input_lens], dtype=torch.long)
            input_ids = input_ids.to(args.device)
            input_mask = input_mask.to(args.device)
            features = model.forward_loss(input_ids, input_mask, input_lens, input_tags=None)
            tags, _ = model.crf._obtain_labels(features, args.id2label, input_lens)
        label_entities = get_entities(tags[0], args.id2label)
        json_d = {}
        json_d['id'] = step
        json_d['tag_seq'] = " ".join(tags[0])
        json_d['entities'] = label_entities
        results.append(json_d)
        pbar(step=step)
    print(" ")
    output_predic_file = str(args.output_dir / "test_prediction.json")
    output_submit_file = str(args.output_dir / "test_submit.json")
    with open(output_predic_file, "w") as writer:
        for record in results:
            writer.write(json.dumps(record) + '\n')
    test_text = []
    with open(str(args.data_dir / 'test.json'), 'r') as fr:
        for line in fr:
            test_text.append(json.loads(line))
    test_submit = []
    for x, y in zip(test_text, results):
        json_d = {}
        json_d['id'] = x['id']
        json_d['label'] = {}
        entities = y['entities']
        words = list(x['text'])
        if len(entities) != 0:
            for subject in entities:
                tag = subject[0]
                start = subject[1]
                end = subject[2]
                word = "".join(words[start:end + 1])
                if tag in json_d['label']:
                    if word in json_d['label'][tag]:
                        json_d['label'][tag][word].append([start, end])
                    else:
                        json_d['label'][tag][word] = [[start, end]]
                else:
                    json_d['label'][tag] = {}
                    json_d['label'][tag][word] = [[start, end]]
        test_submit.append(json_d)
    json_to_text(output_submit_file, test_submit)
Exemplo n.º 28
0
def update_class_file():
    """We store list of labelled tweets in *.cls files in the format
            class label | message
        e.g.
            n | If I see one more back to school commercial I'm giving my eyes a paper cut.
            y | i got lemon on my finger and it stings .-. stupid paper cut -.-
            
        (Only the tweet text is stored here. The other tweet infornation
         is not stored as we used only the tweet text for classification)  
             
        common.LATEST_CLASS_FILE (see common.py for actual name) is the
            file where we keep our most up-to-list of labelled tweets
         
        This function updates common.LATEST_CLASS_FILE using common.TWEETS_FILE,
        which contains all the tweets that have undergone simple screening and
        we have saved.
        
        It also guesses labels for each new tweet using the latest classification
        model.
        
        (We track the tweet id of the latest tweet in common.LATEST_CLASS_FILE)
    """
    
    # The lastest labelled tweet id (an integer) is stored as text in LATEST_CLASS_FILE
    latest_labelled_tweet_id = int(file(common.LATEST_CLASS_FILE, 'rt').read().strip()) if os.path.exists(common.LATEST_CLASS_FILE) else 0    
    previous_tweet_id = latest_labelled_tweet_id
      
    # Read the classification model. This will be used to guess tweet classifications
    model = common.load_model()
         
    # Read the tweets from TWEETS_FILE, label them and store them
    # in labelled_messages
    labelled_messages = []
    fp = open(common.TWEETS_FILE, 'rt')
    for line in fp:
        line = line.strip(' \n')
        
        # Skip empty lines
        if not line:
            continue
            
        # Skip incorrectly formatted lines
        try:    
            id_s,_,_,message = [pt.strip() for pt in line.split('|')]
            id = int(id_s)
        except ValueError:
            print 'ValueError', line
            continue
        
        # Skip tweets we have already saved
        if id <= latest_labelled_tweet_id:
            continue
            
        # Filter out messages that are not even allowed for training    
        if not filters.is_allowed_for_training(message):
            continue
        cls,_ = model.classify(message)    
                   
        labelled_messages.append([definitions.AUTO_CLASSES_LABELS[cls], message])
        latest_labelled_tweet_id = max(id, latest_labelled_tweet_id)
    fp.close()

    print 'found %d new tweets' % len(labelled_messages)
    print 'before: latest_labelled_tweet_id=%d' % previous_tweet_id
    print 'after:  latest_labelled_tweet_id=%d' % latest_labelled_tweet_id

    if latest_labelled_tweet_id == previous_tweet_id:
        print 'Nothing to do'
        exit()
     
    # Save the current labelled data file
    shutil.copyfile(common.CLASS_FILE, '%s.%d' % (common.CLASS_FILE, previous_tweet_id))

    # Add the new entries to the labelled data file
    fp = open(common.CLASS_FILE, 'at')
    for i,t in enumerate(labelled_messages):
        fp.write('%s | %s\n' % (t[0], t[1]))
    fp.close()
    
    print 'Added %d new labelled messages to %s' % (len(labelled_messages), common.CLASS_FILE)

    # Update the latest labelled entry id
    file(common.LATEST_CLASS_FILE, 'wt').write(str(latest_labelled_tweet_id))
Exemplo n.º 29
0
def main():

    if (edgetpu == 1):
        mdl = model_edgetpu
    else:
        mdl = model

    interpreter, labels = cm.load_model(model_dir, mdl, lbl, edgetpu)

    fps = 1
    arr_dur = [0, 0, 0]
    #while cap.isOpened():
    while True:
        start_time = time.time()

        #----------------Capture Camera Frame-----------------
        start_t0 = time.time()
        ret, frame = cap.read()
        if not ret:
            break

        cv2_im = frame
        cv2_im = cv2.flip(cv2_im, 0)
        cv2_im = cv2.flip(cv2_im, 1)

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        arr_dur[0] = time.time() - start_t0
        #cm.time_elapsed(start_t0,"camera capture")
        #----------------------------------------------------

        #-------------------Inference---------------------------------
        start_t1 = time.time()
        cm.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = cm.get_output(interpreter,
                             score_threshold=threshold,
                             top_k=top_k)

        arr_dur[1] = time.time() - start_t1
        #cm.time_elapsed(start_t1,"inference")
        #----------------------------------------------------

        #-----------------other------------------------------------
        start_t2 = time.time()
        track_object(objs, labels)  #tracking  <<<<<<<

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        cv2_im = append_text_img1(cv2_im, objs, labels, arr_dur,
                                  arr_track_data)

        ret, jpeg = cv2.imencode('.jpg', cv2_im)
        pic = jpeg.tobytes()

        #Flask streaming
        yield (b'--frame\r\n'
               b'Content-Type: image/jpeg\r\n\r\n' + pic + b'\r\n\r\n')

        arr_dur[2] = time.time() - start_t2

        fps = round(1.0 / (time.time() - start_time), 1)
        print("*********FPS: ", fps, "************")

    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 30
0
def step2(model, data, args):
    print '### STEP 2: Train for ordinal regression task'

    pretrained_snapshot_fname = 'model_best_loss.th'

    train_loader, val_loader, test_loader = to_ordinal_data(data, args)

    n_samples_train = len(train_loader.dataset)
    n_samples_val = len(val_loader.dataset)
    n_samples_test = len(test_loader.dataset)

    best_val_acc = None
    test_acc = None

    model.to_ordinal()
    saved_model = load_model(model,
                             pretrained_snapshot_fname,
                             args,
                             subdir='snapshots_2')
    if saved_model is not None:
        print 'Loading pretrained model:', pretrained_snapshot_fname
        model = saved_model
        model.cuda()
    else:
        logfile = open(os.path.join(args.workDir, 'log_2.txt'), 'wb')
        model.cuda()
        optimizer = torch.optim.Adam(model.parameters(), args.learningRate2)

        since = time.time()
        for epoch in trange(args.nEpochs2 + 1, desc='Epochs'):
            avg_loss = train(model, optimizer, epoch, train_loader, logfile,
                             args)
            val_loss, val_acc, n_correct = evaluate(model, val_loader, args)
            train_loader.dataset.sample_tuples()
            val_loader.dataset.sample_tuples()

            if best_val_acc is None or best_val_acc < val_acc:
                best_val_acc = val_acc
                tqdm.write('Snapshotting best model: ' +
                           pretrained_snapshot_fname)
                save_model(model,
                           pretrained_snapshot_fname,
                           args,
                           subdir='snapshots_2')

            logline = 'Epoch {:3d}/{}] train_avg_loss = {:.4f}, val_avg_loss = {:.4f}, val_accuracy = {}/{} ({:.2f}%, Best: {:.2f}%)'
            tqdm.write(
                logline.format(epoch, args.nEpochs2, avg_loss, val_loss,
                               n_correct, n_samples_val, val_acc,
                               best_val_acc))

        time_elapsed = time.time() - since
        print 'Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60)
        model = load_model(model,
                           pretrained_snapshot_fname,
                           args,
                           subdir='snapshots_2')
        model.cuda()

    # RANK TESTING ------------
    if not args.skipTest:
        test_loss, test_acc, n_correct = evaluate(model, test_loader, args)
        logline = 'TEST] test_avg_loss = {:.4f}, test_accuracy = {}/{} ({:.2f}%)'
        print logline.format(test_loss, n_correct, n_samples_test, test_acc)

    return model, {
        'Best Val Rank Accuracy': best_val_acc,
        'Test Rank Accuracy': test_acc
    }
Exemplo n.º 31
0
                    help='use pre-trained model')
parser.add_argument('--host',
                    default="localhost",
                    metavar='PATH',
                    help="Location of crayon server")
parser.add_argument('--port',
                    default=8899,
                    metavar='N',
                    help="Port of Crayon server (Default:8899)")

if __name__ == "__main__":

    args = parser.parse_args()
    cudnn.Benchmark = True

    model, unfreeze = load_model(args.arch, args.classes, args.pretrained)
    criterion = nn.CrossEntropyLoss()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    client = None
    logger = None
    try:
        client = CrayonClient(args.host, args.port)
        client.remove_experiment("pytorch_logging")
        logger = client.create_experiment("pytorch_logging")
Exemplo n.º 32
0
def update_class_file():
    """We store list of labelled tweets in *.cls files in the format
            class label | message
        e.g.
            n | If I see one more back to school commercial I'm giving my eyes a paper cut.
            y | i got lemon on my finger and it stings .-. stupid paper cut -.-
            
        (Only the tweet text is stored here. The other tweet infornation
         is not stored as we used only the tweet text for classification)  
             
        common.LATEST_CLASS_FILE (see common.py for actual name) is the
            file where we keep our most up-to-list of labelled tweets
         
        This function updates common.LATEST_CLASS_FILE using common.TWEETS_FILE,
        which contains all the tweets that have undergone simple screening and
        we have saved.
        
        It also guesses labels for each new tweet using the latest classification
        model.
        
        (We track the tweet id of the latest tweet in common.LATEST_CLASS_FILE)
    """

    # The lastest labelled tweet id (an integer) is stored as text in LATEST_CLASS_FILE
    latest_labelled_tweet_id = int(
        file(common.LATEST_CLASS_FILE, 'rt').read().strip()) if os.path.exists(
            common.LATEST_CLASS_FILE) else 0
    previous_tweet_id = latest_labelled_tweet_id

    # Read the classification model. This will be used to guess tweet classifications
    model = common.load_model()

    # Read the tweets from TWEETS_FILE, label them and store them
    # in labelled_messages
    labelled_messages = []
    fp = open(common.TWEETS_FILE, 'rt')
    for line in fp:
        line = line.strip(' \n')

        # Skip empty lines
        if not line:
            continue

        # Skip incorrectly formatted lines
        try:
            id_s, _, _, message = [pt.strip() for pt in line.split('|')]
            id = int(id_s)
        except ValueError:
            print 'ValueError', line
            continue

        # Skip tweets we have already saved
        if id <= latest_labelled_tweet_id:
            continue

        # Filter out messages that are not even allowed for training
        if not filters.is_allowed_for_training(message):
            continue
        cls, _ = model.classify(message)

        labelled_messages.append(
            [definitions.AUTO_CLASSES_LABELS[cls], message])
        latest_labelled_tweet_id = max(id, latest_labelled_tweet_id)
    fp.close()

    print 'found %d new tweets' % len(labelled_messages)
    print 'before: latest_labelled_tweet_id=%d' % previous_tweet_id
    print 'after:  latest_labelled_tweet_id=%d' % latest_labelled_tweet_id

    if latest_labelled_tweet_id == previous_tweet_id:
        print 'Nothing to do'
        exit()

    # Save the current labelled data file
    shutil.copyfile(common.CLASS_FILE,
                    '%s.%d' % (common.CLASS_FILE, previous_tweet_id))

    # Add the new entries to the labelled data file
    fp = open(common.CLASS_FILE, 'at')
    for i, t in enumerate(labelled_messages):
        fp.write('%s | %s\n' % (t[0], t[1]))
    fp.close()

    print 'Added %d new labelled messages to %s' % (len(labelled_messages),
                                                    common.CLASS_FILE)

    # Update the latest labelled entry id
    file(common.LATEST_CLASS_FILE, 'wt').write(str(latest_labelled_tweet_id))