def main():
    warnings.filterwarnings('ignore')

    arg_obj = args.get_input()
    args.print_args(arg_obj)
    crc_task_number = int(arg_obj.number) - 1

    ## Set seed to replicate experiments
    seed = 172
    np.random.seed(seed)

    train_features, train_labels = dataset_loader('train')
    print(train_features.shape, train_labels.shape)
    validation_features, validation_labels = dataset_loader('validation')
    print(validation_features.shape, validation_labels.shape)
    test_features, test_labels = dataset_loader('test')
    print(test_features.shape, test_labels.shape)

    all_features = np.vstack(
        (train_features, validation_features, test_features))
    all_labels = np.hstack((train_labels, validation_labels, test_labels))
    print(all_labels.shape)

    # Create the machine learning models
    models = []
    models.append('LR')
    models.append('LDA')
    models.append('KNN')
    models.append('CART')
    models.append('RF')
    models.append('NB')
    models.append('SVM')

    for model in models:
        print(model)
        preds = np.load('preds/%s_kfold_preds.npy' % model)
        print(classification_report(all_labels, preds, digits=4))
        ave_prec = average_precision_score(all_labels, preds)
        acc = (preds == all_labels).sum() / len(preds)
        tp = ((preds == 1) * (preds == all_labels)).sum()
        fp = ((preds == 1) * (preds != all_labels)).sum()
        tn = ((preds == 0) * (preds == all_labels)).sum()
        fn = ((preds == 0) * (preds != all_labels)).sum()
        prec = tp / (tp + fp)
        rec = tp / (tp + fn)
        f1 = 2 * ((prec * rec) / (prec + rec))
        spec = tn / (tn + fp)
        npv = tn / (tn + fn)
        print('Acc: ', acc)
        print('Prec: ', prec)
        print('Rec: ', rec)
        print('F1: ', f1)
        print('Spec: ', spec)
        print('NPV: ', npv)
        print('Ave Prec: ', ave_prec)
        print('Sum preds: ', np.sum(preds))
        print('Sum labels: ', np.sum(all_labels))
        print()
示例#2
0
def main():
    warnings.filterwarnings('ignore')

    arg_obj = args.get_input()
    args.print_args(arg_obj)
    crc_task_number = int(arg_obj.number) - 1

    ## Set seed to replicate experiments
    seed = 172
    np.random.seed(seed)

    train_features, train_labels = dataset_loader('train')
    print(train_features.shape, train_labels.shape)
    validation_features, validation_labels = dataset_loader('validation')
    print(validation_features.shape, validation_labels.shape)
    test_features, test_labels = dataset_loader('test')
    print(test_features.shape, test_labels.shape)

    # Parameters that can be changed 
    # ------------------------------
    # argument needed for Random Forests Model 
    trees = 2

    # for splitting the dataset, designate a percent to assign to test
    # In our case let's try 10 percent 
    test_size = 0.10

    # For Logistic Regression 
    # seed of pseudo random number generator to use when shuffling the data
    seed = 1

    # Create the machine learning models 
    models = []
    models.append(('LR', LogisticRegression(random_state=seed)))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    models.append(('KNN', KNeighborsClassifier()))
    models.append(('CART', DecisionTreeClassifier(random_state=seed)))
    models.append(('RF', RandomForestClassifier(n_estimators=trees, random_state=seed)))
    models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC(random_state=seed)))

    ## Select model based on task array index
    models = [models[crc_task_number]]

    all_features = np.vstack((train_features, validation_features, test_features))
    all_labels = np.hstack((train_labels, validation_labels, test_labels))
    print(all_features.shape, all_labels.shape)

    for name, model in models:
        print('Cross validating: ', name)
        model_preds = cross_val_predict(model, all_features, all_labels, cv=5)
        np.save('preds/%s_kfold_subset_preds.npy' % name, model_preds)
def main():
    start_time = time.time()

    arg_obj = args.get_input()
    args.print_args(arg_obj)
    number = int(arg_obj.number) - 1

    #split='train'
    split='validation'
    #split='test'
    vid_paths, lmrk_paths, pulse_paths = get_paths(split=split, part_number=number)

    if split == 'train':
        spect_idx = -4
    else:
        spect_idx = -3

    print(pulse_paths.shape)
    print()

    for i in range(len(pulse_paths)):
        pulse_path = pulse_paths[i]
        splits = pulse_path.split('/')
        splits[spect_idx] = 'spectrums'
        spectrum_dir = '/'.join(splits[:-1])
        spectrum_file = splits[-1]
        if not os.path.isdir(spectrum_dir):
            os.makedirs(spectrum_dir)
        spectrum_path = os.path.join(spectrum_dir, spectrum_file)
        print(pulse_path)
        print(spectrum_path)
        pulse = np.load(pulse_path)
        freq, density = spectral_features(pulse)
        print(density.shape)
        np.save(spectrum_path, density)
        print()
    end_time = time.time()
    print('Took %.3f seconds.' % (end_time - start_time))
    return
示例#4
0
    model.eval()
    total_tokens = 0
    total_loss = 0.0
    start_time = time.time()
    step = 0
    for inputs in data_loader:
        step += 1
        token_ids, type_ids, pos_ids, generation_mask, tgt_label, tgt_pos = inputs

        logits = model(token_ids, type_ids, pos_ids, generation_mask, tgt_pos)
        loss = F.cross_entropy(logits, tgt_label, reduction='sum')

        total_loss += loss.numpy()[0]
        total_tokens += tgt_label.shape[0]

    avg_loss = total_loss / total_tokens
    ppl = math.exp(avg_loss)
    avg_speed = (time.time() - start_time) / step
    print('loss: %.4f - ppl: %.4f - %.3fs/step\n' % (avg_loss, ppl, avg_speed))
    model.train()


if __name__ == '__main__':
    args = parse_args()
    print_args(args)

    if args.n_gpus > 1:
        dist.spawn(main, args=(args, ), nprocs=args.n_gpus)
    else:
        main(args)
示例#5
0
def main():
    start_time = time.time()

    arg_obj = args.get_input()
    args.print_args(arg_obj)
    number = int(arg_obj.number) - 1
    print('Using number: ', number)

    tk = int(arg_obj.tk)
    sk = int(arg_obj.sk)
    model_load_path = arg_obj.model_load_path
    arg_obj.fps = IN_FPS

    ## Create processing objects and the frame grabber
    shape_predictor = arg_obj.shape_predictor_path

    if model_load_path is None:
        if sk > 1:
            model_load_path = 'model_weights/3dcnn_sk%d' % sk
        else:
            model_load_path = 'model_weights/3dcnn_tk%d' % tk

    ## Make sure a valid model path was given
    if not os.path.exists(model_load_path):
        print('Incorrect path to model weights for 3dcnn. Make sure sk is in \
               [1,20] and tk is in [3,5,7,...,25]. Exiting.')
        return -1

    ## Use GPU if CUDA is configured and load model to correct device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    hrcnn = HRCNN(drop_p=0, t_kern=tk).float().to(device)

    ## Load model specified by the path
    checkpoint = torch.load(model_load_path, map_location=device)
    hrcnn.load_state_dict(checkpoint['model_state_dict'])
    hrcnn.eval()

    split = 'train'
    #split='val'
    #split='test'
    vid_paths, lmrk_paths, out_paths = get_paths(split=split,
                                                 part_number=number)

    out_dir = '/'.join(out_paths[0].split('/')[:-1])
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)

    print(vid_paths.shape)
    print(lmrk_paths.shape)
    print(out_paths.shape)
    print()

    for i in range(len(vid_paths)):
        vid_path = vid_paths[i]
        lmrk_path = lmrk_paths[i]
        out_path = out_paths[i]
        lmrks = read_lmrks(lmrk_path)
        all_bad, lmrks = clean_lmrks(lmrks)
        video = prep_video(vid_path, lmrks, all_bad=all_bad)
        waveform = CNN3D_waveform(hrcnn, video, IN_FPS, sk, device=device)
        np.save(out_path, waveform)
    end_time = time.time()
    print('Took %.3f seconds.' % (end_time - start_time))
    return