示例#1
0
def load_datasets(FLAGS):

    # Load train data
    print("-"*42)
    print("Loading and preprocessing training data...")
    print("-"*42)
    imgs_train, msks_train = load_data(FLAGS.data_path,"_train")
    imgs_train, msks_train = update_channels(imgs_train, msks_train,
                                             settings.NUM_IN_CHANNELS,
                                             settings.NUM_OUT_CHANNELS,
                                             settings.MODE)

    # Load test data
    print("-"*38)
    print("Loading and preprocessing test data...")
    print("-"*38)
    imgs_test, msks_test = load_data(FLAGS.data_path,"_test")
    imgs_test, msks_test = update_channels(imgs_test, msks_test,
                                           settings.NUM_IN_CHANNELS,
                                           settings.NUM_OUT_CHANNELS,
                                           settings.MODE)

    print("Training images shape: {}".format(imgs_train.shape))
    print("Training masks shape:  {}".format(msks_train.shape))
    print("Testing images shape:  {}".format(imgs_test.shape))
    print("Testing masks shape:   {}".format(msks_test.shape))

    """
    Iterator for Dataset
    """
    # train = (imgs_train, msks_train)
    # train_dataset = tf.data.Dataset.from_tensor_slices(train_data).\
    #             shuffle(32000, reshuffle_each_iteration=True).repeat().batch(FLAGS.batch_size)
    # test = (imgs_test, msks_test)
    # test_dataset = tf.data.Dataset.from_tensor_slices(test_data).\
    #             batch(FLAGS.batch_size).repeat()
    #
    # train_iterator = train_dataset.make_initializable_iterator()
    # test_iterator = test_dataset.make_initializable_iterator()

    # return train_iterator, test_iterator

    return imgs_train, msks_train, imgs_test, msks_test
示例#2
0
def main(args):
    train_loader, val_loader = load_data(args)
    args.weight_labels = torch.tensor(
        calculate_weigths_labels('cityscape', train_loader,
                                 args.n_classes)).float()
    if args.cuda:
        # args.weight_labels = args.weight_labels.cuda()
        pass

    model = DeepLabV3Plus()
    if args.cuda:
        model = model.cuda()
    if args.evaluation:
        checkpoint = torch.load('./checkpoint/checkpoint_model_6000.pth',
                                map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        eval(val_loader, model, args)
    else:
        # criterion = SegmentationLosses(weight=args.weight_labels, cuda=True)
        criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_mask,
                                        weight=None).cuda()

        backbone_params = nn.ParameterList()
        decoder_params = nn.ParameterList()

        for name, param in model.named_parameters():
            if 'backbone' in name:
                backbone_params.append(param)
            else:
                decoder_params.append(param)

        params_list = [{
            'params': backbone_params
        }, {
            'params': decoder_params,
            'lr': args.lr * 10
        }]

        optimizer = optim.SGD(params_list,
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay,
                              nesterov=True)
        scheduler = PolyLr(optimizer,
                           gamma=args.gamma,
                           max_iteration=args.max_iteration,
                           warmup_iteration=args.warmup_iteration)

        global_step = 0
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        while global_step < args.max_iteration:
            global_step = train(global_step, train_loader, model, optimizer,
                                criterion, scheduler, args)
        eval(val_loader, model, args)
示例#3
0
def main(args):
    model = Network()

    train_loader, test_loader = load_data(args)

    if args.flag:
        _eval(model, test_loader, args)
    else:
        for epoch in range(1, args.epochs + 1):
            train(model, train_loader, epoch, args)
        _eval(model, test_loader, args)
def main(args, logger):
    train_loader, test_loader = load_data(args)
    if args.dataset == 'CIFAR10':
        num_classes = 10
    elif args.dataset == 'CIFAR100':
        num_classes = 100
    elif args.dataset == 'IMAGENET':
        num_classes = 1000

    print('img_size: {}, num_classes: {}, stem: {}'.format(
        args.img_size, num_classes, args.stem))
    if args.model_name == 'ResNet26':
        print('Model Name: {0}'.format(args.model_name))
        model = ResNet26(num_classes=num_classes,
                         stem=args.stem,
                         dataset=args.dataset)
    elif args.model_name == 'ResNet38':
        print('Model Name: {0}'.format(args.model_name))
        model = ResNet38(num_classes=num_classes,
                         stem=args.stem,
                         dataset=args.dataset)
    elif args.model_name == 'ResNet50':
        print('Model Name: {0}'.format(args.model_name))
        model = ResNet50(num_classes=num_classes,
                         stem=args.stem,
                         dataset=args.dataset)

    if args.pretrained_model:
        filename = 'best_model_' + str(args.dataset) + '_' + str(
            args.model_name) + '_' + str(args.stem) + '_ckpt.tar'
        print('filename :: ', filename)
        file_path = os.path.join(args.checkpoint_dir, filename)
        checkpoint = torch.load(file_path)

        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']
        best_acc = checkpoint['best_acc']
        model_parameters = checkpoint['parameters']
        print('Load model, Parameters: {0}, Start_epoch: {1}, Acc: {2}'.format(
            model_parameters, start_epoch, best_acc))
        logger.info(
            'Load model, Parameters: {0}, Start_epoch: {1}, Acc: {2}'.format(
                model_parameters, start_epoch, best_acc))
    else:
        start_epoch = 1
        best_acc = 0.0

    if args.cuda:
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        model = model.cuda()

    eval(model, test_loader, args)
def model_whole_train():
    conf_outmodelpath = get_config()['model_path']
    out_model_dir = conf_outmodelpath['out_model_dir']

    logger.info('loading whole training data (without split)...')
    model_whole_train_test_data = os.path.join(out_model_dir, conf_outmodelpath['model_whole_train_test_data'])
    if os.path.exists(model_whole_train_test_data):
        tt_data = np.load(model_whole_train_test_data)
        x_train, y_train, feat_name = tt_data['x_train'], tt_data['y_train'], tt_data['feat_name']
    else:
        x, y_label, _, y_stage_val, feat_name = load_data('train_data')  # HARD CODE here

        # tarining data splitting and transformation
        x_train, y_train = data_whole_transform(x, y_stage_val)
        np.savez(model_whole_train_test_data,
                 x_train=x_train, y_train=y_train, feat_name=feat_name)

    logger.info('selecting features by model...')
    supp, x_train_new, y_train_new = feature_sel(x_train,
                                                 y_train,
                                                 k=15,
                                                 estimator=ExtraTreesClassifier(n_estimators=40, max_depth=5,
                                                                                max_features='auto'),
                                                 sel_method='estimator')

    model_sel_feature_re_fname = os.path.join(out_model_dir,
                                              conf_outmodelpath['model_whole_select_feature_results'])
    logger.info('saving the selected features...')
    if os.path.exists(model_sel_feature_re_fname):
        os.remove(model_sel_feature_re_fname)
    np.savez(model_sel_feature_re_fname, original_feat=feat_name, supp=supp)

    logger.info('training whole model...')
    cls_weight = get_class_weight(y_train_new)
    # clf, _ = model_train_run(x_train_new, y_train_new, cls_weight=cls_weight, cv_score=False)

    clf, _ = single_model_train_run(x_train_new, y_train_new, cv_score=False, cls_weight=cls_weight)
    y_pred = clf.predict_proba(x_train_new)

    risk = np.dot(y_pred, np.array([[0], [1], [4], [8], [10]]))

    logger.info(clf.classes_)
    logger.info(list(zip(y_train_new, y_pred, risk.flatten())))

    logger.info('saving whole model...')
    model_whole_model_dump_file = os.path.join(out_model_dir,
                                               conf_outmodelpath['model_whole_model_dump_file'])
    if os.path.exists(model_whole_model_dump_file):
        os.remove(model_whole_model_dump_file)
    joblib.dump(clf, model_whole_model_dump_file)

    logger.info('for model evalution, see corresponding .ipynb file')
    logger.info('DONE--training model based on the whole data')
def load_test_data():

    # Load test data
    tf.logging.info('-' * 38)
    tf.logging.info('Loading and preprocessing test data...')
    tf.logging.info('-' * 38)
    imgs_test, msks_test = load_data(settings_dist.OUT_PATH, "_test")
    imgs_test, msks_test = update_channels(imgs_test, msks_test,
                                           settings_dist.IN_CHANNEL_NO,
                                           settings_dist.OUT_CHANNEL_NO,
                                           settings_dist.MODE)

    return imgs_test, msks_test
示例#7
0
def main_big():

    #-- Load data --#
    '''
        word_number : dictionnary which associates word to number depending of the training set
        length : maximum length of a sequence
    '''
    coeff = 1.1
    lemmatisation = True
    output_name = 'conv2_bigDataset_lem'
    #X_train, Y_train, X_test, Y_test, length, word_number = load_data('data/twb_clean.txt', 'data/twb-dev_clean.txt', threshold = 10, seq = 60)
    X_train, Y_train, X_test, Y_test, length, word_number = load_data(
        'Sentiment_Analysis_Dataset_correct_train_preproces.txt',
        'Sentiment_Analysis_Dataset_correct_test_preproces.txt',
        threshold=5,
        seq=-1,
        coeff=coeff,
        lemmatisation=lemmatisation)
    #print(X_test)

    # list vocabulaire
    size_vocab = max(list(word_number.values()))
    print(' ### Parameters ###')
    print('lemmatisation : ', lemmatisation)
    print('name saved : ', output_name)
    print('size vocabulary :', size_vocab)
    print('negative :', list(Y_train).count(0))
    #print('neutral :' ,list(Y_train).count(1))
    print('postive :', list(Y_train).count(1))

    print('negative :', list(Y_test).count(0))
    #print('neutral :' ,list(Y_test).count(1))
    print('postive :', list(Y_test).count(1))
    # split train into train and valid data
    #X_train, X_val, y_train, y_val = train_test_split(data_train, etq_train, test_size=0.2)

    ## enrengistre le dictionnaire qui contient la conversion entre mot -> nombre.
    ##  Attention : le dictionnaire change à chaque nouveau entrainement, si on veut evaluer le modele, penser à charger le dictionnaire et donc utiliser la fonction load3
    pickle.dump(word_number, open('word_number_conv2_glove_lem.pkl', 'wb'))
    model = train_model(X_train,
                        Y_train,
                        X_test,
                        Y_test,
                        int(coeff * length),
                        size_vocab,
                        output_dim=300,
                        batch_size=128,
                        dic_word=word_number,
                        embedding_glove=True,
                        lemmatisation=lemmatisation,
                        output_name=output_name)
def load_test_data():

    # Load test data
    print('-' * 38)
    print('Loading and preprocessing test data...')
    print('-' * 38)
    imgs_test, msks_test = load_data(
        "/home/bduser/data_test/{0}/data/".format(sample), "_test")
    imgs_test, msks_test = update_channels(imgs_test, msks_test,
                                           settings_dist.IN_CHANNEL_NO,
                                           settings_dist.OUT_CHANNEL_NO,
                                           settings_dist.MODE)

    return imgs_test, msks_test
示例#9
0
def main():

    inputs, labels = load_data('inputs.npy', 'labels.npy')
    group_num = 10
    group_size = len(inputs) // group_num
    acc = 0
    for j in range(group_num):
        # dont want to do this until it actually starts working...
        # for cross validation
        if j == 1:
            break
        print('test set is: {} out of {}'.format(j, group_num))
        model = Model()
        # create train/test sets by excluding the current test set

        if j == 0:
            train_inputs = inputs[group_size * (j + 1):]
            train_labels = labels[group_size * (j + 1):]
        elif j == group_num - 1:
            train_inputs = inputs[:group_size * j]
            train_labels = labels[:group_size * j]
        else:
            train_inputs = inputs[:group_size * j] + inputs[group_size *
                                                            (j + 1):]
            train_labels = labels[:group_size * j] + labels[group_size *
                                                            (j + 1):]

        test_inputs = inputs[group_size * j:group_size * (j + 1)]
        test_labels = labels[group_size * j:group_size * (j + 1)]
        print(train_inputs.shape)
        for i in range(model.epochs):
            print('epoch #{}'.format(i + 1))
            train(model, train_inputs, train_labels)
            prob = model.call(train_inputs)
            acc = model.accuracy(prob, train_labels)
            print('train acc: {}'.format(acc))
            prob = model.call(test_inputs)
            acc = model.accuracy(prob, test_labels)
            print('test acc: {}'.format(acc))
        curr_acc = test(model, test_inputs, test_labels)
        acc += curr_acc
        print('test group {} acc: {}'.format(j, curr_acc))
    # acc = test(model, test_inputs, test_labels)
    # print('accuracy of model: {}'.format(acc))

    # overall_acc = acc / float(group_num)
    overall_acc = acc
    print('overall acc: {}'.format(overall_acc))

    return
示例#10
0
def main():
    args = init_args()
    if args.list_models:
        print('\n'.join(models.get_model_names()))
        exit()
    m = args.model.split(',')
    dict_m = models.get_models(m)
    x, y = preprocess.load_data(args.train_datesets)
    for model_name in dict_m:
        model = dict_m[model_name]
        print('Training model %s' % model_name)
        model.fit(x, y)
        models.save_model(model, model_name, args.model_dir)
        print('Train finished, save to %s' % args.model_dir)
示例#11
0
def main(args):
    train_loader, test_loader = load_data(args)

    GeneratorA2B = CycleGAN()
    GeneratorB2A = CycleGAN()

    DiscriminatorA = Discriminator()
    DiscriminatorB = Discriminator()

    if args.cuda:
        GeneratorA2B = GeneratorA2B.cuda()
        GeneratorB2A = GeneratorB2A.cuda()

        DiscriminatorA = DiscriminatorA.cuda()
        DiscriminatorB = DiscriminatorB.cuda()

    optimizerG = optim.Adam(itertools.chain(GeneratorA2B.parameters(), GeneratorB2A.parameters()), lr=args.lr, betas=(0.5, 0.999))
    optimizerD = optim.Adam(itertools.chain(DiscriminatorA.parameters(), DiscriminatorB.parameters()), lr=args.lr, betas=(0.5, 0.999))

    if args.training:
        path = 'E:/cyclegan/checkpoints/model_{}_{}.pth'.format(285, 200)

        checkpoint = torch.load(path)
        GeneratorA2B.load_state_dict(checkpoint['generatorA'])
        GeneratorB2A.load_state_dict(checkpoint['generatorB'])
        DiscriminatorA.load_state_dict(checkpoint['discriminatorA'])
        DiscriminatorB.load_state_dict(checkpoint['discriminatorB'])
        optimizerG.load_state_dict(checkpoint['optimizerG'])
        optimizerD.load_state_dict(checkpoint['optimizerD'])

        start_epoch = 285
    else:
        init_net(GeneratorA2B, init_type='normal', init_gain=0.02, gpu_ids=[0])
        init_net(GeneratorB2A, init_type='normal', init_gain=0.02, gpu_ids=[0])

        init_net(DiscriminatorA, init_type='normal', init_gain=0.02, gpu_ids=[0])
        init_net(DiscriminatorB, init_type='normal', init_gain=0.02, gpu_ids=[0])
        start_epoch = 1

    if args.evaluation:
        evaluation(test_loader, GeneratorA2B, GeneratorB2A, args)
    else:
        cycle = nn.L1Loss()
        gan = nn.BCEWithLogitsLoss()
        identity = nn.L1Loss()

        for epoch in range(start_epoch, args.epochs):
            train(train_loader, GeneratorA2B, GeneratorB2A, DiscriminatorA, DiscriminatorB, optimizerG, optimizerD, cycle, gan, identity, args, epoch)
        evaluation(test_loader, GeneratorA2B, GeneratorB2A, args)
示例#12
0
def main(args):
    train_loader, test_loader = load_data(args)

    if not os.path.isdir('checkpoints'):
        os.mkdir('checkpoints')

    args.vocab_len = len(args.vocab['stoi'].keys())

    model = BERT(args.vocab_len, args.max_len, args.heads, args.embedding_dim,
                 args.N)
    if args.cuda:
        model = model.cuda()

    if args.task:
        print('Start Down Stream Task')
        args.epochs = 3
        args.lr = 3e-5

        state_dict = torch.load(args.checkpoints)
        model.load_state_dict(state_dict['model_state_dict'])

        criterion = {'mlm': None, 'nsp': nn.CrossEntropyLoss()}

        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        for epoch in range(1, args.epochs + 1):
            train_mlm_loss, train_nsp_loss, train_loss, train_mlm_acc, train_nsp_acc = _train(
                epoch, train_loader, model, optimizer, criterion, args)
            test_mlm_loss, test_nsp_loss, test_loss, test_mlm_acc, test_nsp_acc = _eval(
                epoch, test_loader, model, criterion, args)
            save_checkpoint(model, optimizer, args, epoch)
    else:
        print('Start Pre-training')
        criterion = {
            'mlm': nn.CrossEntropyLoss(ignore_index=0),
            'nsp': nn.CrossEntropyLoss()
        }
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.weight_decay)

        for epoch in range(1, args.epochs):
            train_mlm_loss, train_nsp_loss, train_loss, train_mlm_acc, train_nsp_acc = _train(
                epoch, train_loader, model, optimizer, criterion, args)
            test_mlm_loss, test_nsp_loss, test_loss, test_mlm_acc, test_nsp_acc = _eval(
                epoch, test_loader, model, criterion, args)
            save_checkpoint(model, optimizer, args, epoch)
示例#13
0
def load_train_set():
    dataset = pp.load_data(settings.datadir, exclude=["nonbullying"])
    nb_dataset = pp.load_data(settings.datadir, include=["nonbullying"])
    images, labels = [], []
    label_map = []
    print("Load Train Data:")
    for lab, im in dataset.items():
        images.extend(im)
        labels.extend([len(label_map)] * len(im))
        label_map.append(lab)
        print("   ", lab, len(im))
    nb_images = random.choices(nb_dataset["nonbullying"],
                               k=min(len(nb_dataset["nonbullying"]),
                                     int(1.5 * len(images))))
    label_map.append("nonbullying")
    print("   ", "nonbullying", len(nb_images))
    print("Total", len(images) + len(nb_images))
    print("Label Map:")
    for i, name in enumerate(label_map):
        print("   ", name, i)

    with tf.name_scope("db"):
        data = (images + nb_images,
                labels + [label_map.index("nonbullying")] * len(nb_images))
        db = tf.data.Dataset.from_tensor_slices(data)
        db = db.map(lambda _, __: (load_im(_), __), num_parallel_calls=4)
        db = db.cache()
        db = db.map(lambda _, __: (preprocess(_, True), __),
                    num_parallel_calls=4)
        db = db.shuffle(
            buffer_size=(len(images) +
                         len(nb_images))).batch(BATCH_SIZE).prefetch(
                             BATCH_SIZE * 4)
        it = db.make_initializable_iterator()

    return label_map, db, it
示例#14
0
def main():
    global LEARNING_RATE
    global REG_RATE
    global tr_p, tr_f, tr_y, va_p, va_f, va_y, label_encoder, tr_label, va_label
    tr_p, tr_f, tr_y, va_p, va_f, va_y, label_encoder = preprocess.load_data('data/train.csv', 'train')

    tr_label = to_categorical(tr_y, CLASS_NUMBER)
    va_label = to_categorical(va_y, CLASS_NUMBER)
    #test = load_data('data/test.csv', 'test')

    lr = 0.001
    reg = (-4,-8)
    iteration_time = 10
    model_path = 'model_withoutDA_trained.h5'
    train_model(model_path, iteration_time, 300, reg, lr_rate=lr)

    # choose the best model from the 10 trained models
    filter_model_path = 'model_withoutDA_filtered.h5'
    model = filter_models(iteration_time, filter_model_path)

    te_p, te_f, te_ids = preprocess.load_data('data/test.csv', 'test')
    prob = model.predict_proba([te_p, te_f])
    MyPredict.predict(prob, te_ids, 'cnn_pred.csv')
    return model, te_p, te_f
示例#15
0
def main(args):
    train_loader, test_loader, down_train_loader, down_test_loader, = load_data(
        args)

    if not os.path.isdir('checkpoints'):
        os.mkdir('checkpoints')

    model = Model(args)
    down_model = DownStreamModel(args)
    if args.cuda:
        model = model.cuda()
        down_model = down_model.cuda()

    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          weight_decay=args.weight_decay,
                          momentum=args.momentum)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                              T_max=800)

    if args.pretrain:
        train_losses, epoch_list = [], []
        for epoch in range(1, args.epochs + 1):
            train_loss = pre_train(epoch, train_loader, model, optimizer, args)
            if epoch % 25 == 0:
                save_checkpoint(model, optimizer, args, epoch)
            lr_scheduler.step()
            train_losses.append(train_loss)
            epoch_list.append(epoch)
            print(' Cur lr: {0:.5f}'.format(lr_scheduler.get_last_lr()[0]))
            plt.plot(epoch_list, train_losses)
            plt.xlabel('epoch')
            plt.ylabel('training loss')
            plt.savefig('test.png', dpi=300)

    down_optimizer = optim.SGD(down_model.parameters(),
                               lr=0.1,
                               weight_decay=5e-4,
                               momentum=args.momentum)
    down_criterion = nn.CrossEntropyLoss()
    down_lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, T_0=10, T_mult=2, eta_min=0.0001)
    for epoch in range(1, 11):
        _train(epoch, down_train_loader, down_model, down_optimizer,
               down_criterion, args)
        _eval(epoch, down_test_loader, down_model, down_criterion, args)
        down_lr_scheduler.step()
示例#16
0
    def __init__(self):
        self.stddev = 0.02
        self.batch_size = 160

        self.data = load_data()
        self.d_a = 50
        self.d_b = 5
        self.d_c = 230
        self.d = self.d_a + self.d_b * 2
        self.l = 3
        self.n_r = len(self.data["relation_map"])

        self.word_map = self.data["word_map"]
        self.word_matrix = self.data["word_matrix"]
        self.train_list = self.data["train_list"]
        self.num_positions = 2 * self.data["limit"] + 1
        self.bags_list = self.data["bags_list"]
        print("Number of bags:", len(self.bags_list))
        self.num_epochs = 50
        self.max_length = self.data["max_length"]

        self.sentences_placeholder = tf.placeholder(tf.int32,
                                                    [None, self.max_length, 3])
        self.sentences = tf.expand_dims(self.sentences_placeholder, -1)
        self.sentence_vectors = self.train_sentence(self.sentences)

        self.flat_sentences = tf.squeeze(self.sentence_vectors, [1, 2])
        self.bag_indices = tf.placeholder(tf.int32, [self.batch_size])
        self.avg_sentences = self.avg_bags(self.bag_indices,
                                           self.flat_sentences)

        self.logits = self.fully_connected(self.avg_sentences, self.d_c,
                                           self.n_r, "logits")

        self.labels_placeholder = tf.placeholder(tf.int32, [self.batch_size])

        self.cost = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.labels_placeholder, logits=self.logits))

        self.optimizer = tf.train.AdamOptimizer(0.01).minimize(self.cost)

        tf.summary.scalar("cost", self.cost)

        # merge all summaries into a single "operation" which we can execute in a session
        self.summary_op = tf.summary.merge_all()
示例#17
0
def main_SemEval():

    #-- Load data --#
    '''
        word_number : dictionnary which associates word to number depending of the training set
        length : maximum length of a sequence
    '''
    coeff = 1.0
    lemmatisation = False
    output_name = 'lstmTF_semeval'
    X_train, Y_train, X_test, Y_test, length, word_number = load_data(
        'data/twb_cleanv5.txt',
        'data/twb-dev_cleanv5.txt',
        threshold=5,
        seq=40,
        coeff=coeff,
        lemmatisation=lemmatisation)
    #print(X_test)lstm
    word_pkl = 'word_number_lstmTF_semeval.pkl'  #'word_number_cbowdataset2_noglove.pkl'
    # list vocabulaire
    size_vocab = max(list(word_number.values()))
    print('size vocabulary :', size_vocab)
    print('negative :', list(Y_train).count(0))
    #print('neutral :' ,list(Y_train).count(1))
    print('postive :', list(Y_train).count(1))

    print('negative :', list(Y_test).count(0))
    #print('neutral :' ,list(Y_test).count(1))
    print('postive :', list(Y_test).count(1))

    ## enrengistre le dictionnaire qui contient la conversion entre mot -> nombre.
    ##  Attention : le dictionnaire change à chaque nouveau entrainement, si on veut evaluer le modele, penser à charger le dictionnaire et donc utiliser la fonction load3
    pickle.dump(word_number, open(word_pkl, 'wb'))

    model = train_model(X_train,
                        Y_train,
                        X_test,
                        Y_test,
                        int(coeff * length),
                        size_vocab,
                        output_dim=300,
                        batch_size=32,
                        dic_word=word_number,
                        embedding_glove=True,
                        lemmatisation=lemmatisation,
                        output_name=output_name)
示例#18
0
文件: evaluation.py 项目: mfkiwl/CLIP
def main(args):
    if args.cuda:
        device = 'cuda'
    else:
        device = 'cpu'
    model, preprocess = clip.load('ViT-B/32', device)
    args.input_resolution = 224

    test_data, test_loader = load_data(args)

    text_inputs = torch.cat([
        clip.tokenize(f"a photo of a {c}") for c in test_data.classes
    ]).to(device)
    if args.cuda:
        text_inputs = text_inputs.cuda()

    _eval(model, text_inputs, test_loader, args)
示例#19
0
def train():
    with tf.Graph().as_default():
        features, labels = model.placeholder_inputs(BATCH_SIZE, NUM_FEATURES)

        pred = model.get_model(features)
        # with tf.name_scope('loss') as scope:
        loss = model.get_loss(pred, labels)
        tf.summary.scalar('loss', loss)

        total, count = tf.metrics.accuracy(labels=tf.to_int64(labels),
                                           predictions=tf.argmax(pred, 1),
                                           name='accuracy')
        tf.summary.scalar('accuracy', count)

        # Get training operator
        optimizer = tf.train.AdamOptimizer(LEARNING_RATE)
        train_op = optimizer.minimize(loss)

        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()

        # Create a session
        sess = tf.Session()

        # Add summary writers
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
                                             sess.graph)

        # Init variables
        init = tf.global_variables_initializer()
        local = tf.local_variables_initializer()
        sess.run(init)
        sess.run(local)

        for epoch in range(NUM_EPOCHS):
            data, label = preprocess.load_data()

            feed_dict = {features: data, labels: label}
            summary, _, loss_val, pred_val, accurate = sess.run(
                [merged, train_op, loss, pred, count], feed_dict=feed_dict)
            train_writer.add_summary(summary, epoch)
            print(accurate)

            save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"))
    return
示例#20
0
    def __init__(self,
                 phase,
                 set_name,
                 save_counter=False,
                 max_doc_len=100,
                 max_num_lines=None,
                 save=False,
                 wordict=None):
        preserved_words = ("<pad>", "<unk>")

        if phase not in ['train', 'dev', 'test']:
            raise AssertionError('Invalid phase')
        # find all dataset zip files
        file_list = os.listdir()
        file_pattern = re.compile(
            r'(2018-)?EI-([a-zA-Z]+)-En-([a-zA-Z]+)\.zip')
        file_info_list = list(map(file_pattern.search, file_list))
        file_info_list = list(
            filter(
                lambda x: x is not None and x.groups()[1] == set_name and x.
                groups()[2] == phase, file_info_list))
        total_zipfile_count = len(file_info_list)
        for index, file_info in enumerate(file_info_list):
            print('ZipFile: %d / %d' % (index + 1, total_zipfile_count))
            self.data, self.intensity = load_data(file_info[0],
                                                  max_num_lines=max_num_lines)
        # get word_dict & label_dict
        if wordict is None:
            self.wordict = build_dict(self.data,
                                      'word_' + set_name,
                                      save,
                                      save_counter,
                                      preserved_words=preserved_words)
        else:
            self.wordict = wordict
        # word2idx
        tmp_data = {}
        for emotion, data in self.data.items():
            tmp_data[emotion] = np.array([[
                self.wordict[emotion][word] if word in self.wordict[emotion]
                else preserved_words.index('<unk>') for word in entry
            ][:max_doc_len] + [0] * (max_doc_len - len(entry))
                                          for entry in data], np.int32)
        self.data = tmp_data
        self.emotion = None
示例#21
0
def train(args):
    dataset = load_data(args.dirname)
    dataset.split_data(test_config)
    adj = dataset.adj.to(device)
    pre_emb = dataset.word_pre_emb.to(device)
    lookup = dataset.lookup_index.to(device)

    logger.info('start training on dataset user:{}, item:{}, other:{}'.format(
        dataset.nb_user, dataset.nb_item, dataset.nb_proj + dataset.nb_class))
    get_calls_distribution(dataset)

    model = GCNRec(dataset.nb_user, dataset.nb_item,
                   dataset.nb_proj + dataset.nb_class, adj, dataset.vocab_sz,
                   lookup, pre_emb).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)
    num_params = sum([p.numel() for p in model.parameters()])
    logger.info('total model parameters: {}'.format(num_params))

    batch_sz = args.batch_sz
    neg_sz = args.neg_sz
    save_round = args.save_round
    nb_epoches = args.epoch_num
    for i in range(nb_epoches):
        dataset.shuffle_train()
        model.train()
        epoch_loss = 0
        for user, pos_item, neg_item in tqdm(
                dataset.gen_batch(batch_sz, neg_sz),
                total=len(dataset.train) // batch_sz):
            label = np.concatenate(
                (np.ones(batch_sz), np.zeros(batch_sz * neg_sz)))
            loss = model(gvar(user), gvar(pos_item), gvar(neg_item),
                         gvar(label))
            epoch_loss += loss.item()
            if np.isnan(epoch_loss):
                logger.error(epoch_loss)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('epoch: {} loss:{}'.format(i, epoch_loss))
        if (i + 1) % save_round == 0:
            save_model(model, args.dirname, i + 1)
            print('saved model dict')
            eval2(model, dataset)
示例#22
0
def main():
    with fluid.dygraph.guard(place=fluid.CPUPlace()):

        model = MNIST_CNN('mnist_model')
        model.train()
        train_loader = load_data('train', BATCH_SIZE)

        # 定义学习率,并加载优化器参数到模型中
        total_steps = (int(60000 // BATCH_SIZE) + 1) * EPOCH_NUM
        lr = fluid.dygraph.PolynomialDecay(0.01, total_steps, 0.001)
        optimizer = fluid.optimizer.AdamOptimizer(
            learning_rate=lr, parameter_list=model.parameters())

        for epoch_id in range(EPOCH_NUM):

            for batch_id, data in enumerate(train_loader()):

                image_data, label_data = data
                image = fluid.dygraph.to_variable(image_data)
                label = fluid.dygraph.to_variable(label_data)

                # forward propagation
                predict, acc = model(image, label)
                loss = fluid.layers.cross_entropy(predict, label)
                avg_loss = fluid.layers.mean(loss)
                avg_acc = fluid.layers.mean(acc)
                if batch_id % 200 == 0:
                    print(
                        "epoch: {}, batch: {}, loss is: {}, acc is {}".format(
                            epoch_id, batch_id, avg_loss.numpy(),
                            avg_acc.numpy()))

                # backward propagation
                avg_loss.backward()
                optimizer.minimize(avg_loss)
                model.clear_gradients()

        # save model and optim @para
        fluid.save_dygraph(model.state_dict(),
                           './checkpoint/epoch{}'.format(epoch_id))
        fluid.save_dygraph(optimizer.state_dict(),
                           './checkpoint/epoch{}'.format(epoch_id))
示例#23
0
def main(args):
    train_loader, test_loader = load_data(args)
    model = LambdaResNet18()
    print('Model Parameters: {}'.format(get_n_params(model)))

    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          weight_decay=args.weight_decay,
                          momentum=args.momentum)

    if not os.path.isdir('checkpoints'):
        os.mkdir('checkpoints')

    if args.checkpoints is not None:
        checkpoints = torch.load(os.path.join('checkpoints', args.checkpoints))
        model.load_state_dict(checkpoints['model_state_dict'])
        optimizer.load_state_dict(checkpoints['optimizer_state_dict'])
        start_epoch = checkpoints['global_epoch']
    else:
        start_epoch = 1

    if args.cuda:
        model = model.cuda()

    if not args.evaluation:
        criterion = nn.CrossEntropyLoss()
        lr_scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer, T_0=10, T_mult=2, eta_min=0.00001)

        global_acc = 0.
        for epoch in range(start_epoch, args.epochs + 1):
            _train(epoch, train_loader, model, optimizer, criterion, args)
            best_acc = _eval(epoch, test_loader, model, args)
            if global_acc < best_acc:
                global_acc = best_acc
                save_checkpoint(best_acc, model, optimizer, args, epoch)

            lr_scheduler.step()
            print('Current Learning Rate: {}'.format(
                lr_scheduler.get_last_lr()))
    else:
        _eval(start_epoch, test_loader, model, args)
示例#24
0
def main():
    args = init_args()
    if args.list_models:
        print('\n'.join(models.get_model_names()))
        exit()
    m = args.model.split(',')
    x, y = preprocess.load_data(args.input_csv)
    for model_name in m:
        model = models.load_model(model_name, args.model_dir)
        if model is None:
            if not args.quiet:
                print('Invalid model %s' % model_name)
            continue
        if not args.quiet:
            print('Load model %s' % model_name)
        pre_y = model.predict(x)
        if not args.quiet:
            print('Predict by model %s: %.2f' % (model_name, pre_y[0]))
        else:
            print('%.2f' % pre_y[0])
示例#25
0
def main(args):
    train_loader, val_loader, test_loader = load_data(args)
    g = Graph(args.dilation, args.m, args.hidden_size, args.max_level,
              args.per_n_nodes)
    model = Model(graph=g.graph,
                  max_level=args.max_level,
                  per_n_node=args.per_n_nodes)

    if args.cuda:
        model = model.cuda()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    start_epoch = 1

    for epoch in (start_epoch, args.epochs):
        _train(epoch, train_loader, model, criterion, optimizer, args)
        _eval(epoch, val_loader, model, criterion, args)
示例#26
0
文件: cnn_test.py 项目: benkha/nre_tf
    def __init__(self):
        self.stddev = 0.02
        self.batch_size = 160

        self.data = load_data()
        self.d_a = 50
        self.d_b = 5
        self.d_c = 230
        self.d = self.d_a + self.d_b * 2
        self.l = 3
        self.n_r = len(self.data["relation_map"])

        self.word_map = self.data["word_map"]
        self.word_matrix = self.data["word_matrix"]
        self.train_list = self.data["test_list"]
        self.num_positions = 2 * self.data["limit"] + 1
        self.bags_list = self.data["bags_list"]
        print("Number of bags:", len(self.bags_list))
        self.num_epochs = 50
        self.max_length = self.data["max_length"]

        self.sentences_placeholder = tf.placeholder(tf.float32,
                                                    [None, self.max_length, 3])
        self.sentences = tf.expand_dims(self.sentences_placeholder, -1)
        self.sentence_vectors = self.train_sentence(self.sentences)

        self.flat_sentences = tf.squeeze(self.sentence_vectors, [1, 2])
        self.bag_indices = tf.placeholder(tf.int32, [self.batch_size])
        self.avg_sentences = self.avg_bags(self.bag_indices,
                                           self.flat_sentences)

        self.logits = self.fully_connected(self.avg_sentences, self.d_c,
                                           self.n_r, "logits")

        self.labels_placeholder = tf.placeholder(tf.int32, [self.batch_size])

        self.cost = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.labels_placeholder, logits=self.logits))
示例#27
0
文件: main.py 项目: BordiaS/SimSiam-1
def main(args):
    train_loader, test_loader, down_train_loader, down_test_loader, = load_data(
        args)

    if not os.path.isdir('checkpoints'):
        os.mkdir('checkpoints')

    model = Model(args)
    down_model = DownStreamModel(args)
    if args.cuda:
        model = model.cuda()
        down_model = down_model.cuda()

    if args.pretrain:
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.weight_decay,
                              momentum=args.momentum)
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                                  T_max=800)

        train_losses, epoch_list = [], []
        for epoch in range(1, args.epochs + 1):
            train_loss = pre_train(epoch, train_loader, model, optimizer, args)
            if epoch % args.print_intervals == 0:
                save_checkpoint(model, optimizer, args, epoch)
                args.down_epochs = 1
                train_eval_down_task(down_model, down_train_loader,
                                     down_test_loader, args)
            lr_scheduler.step()
            train_losses.append(train_loss)
            epoch_list.append(epoch)
            print(' Cur lr: {0:.5f}'.format(lr_scheduler.get_last_lr()[0]))
        plt.plot(epoch_list, train_losses)
        plt.savefig('test.png', dpi=300)
    else:
        args.down_epochs = 810
        train_eval_down_task(down_model, down_train_loader, down_test_loader,
                             args)
示例#28
0
import numpy as np

from sklearn.cross_validation import KFold
from sklearn.grid_search import GridSearchCV
from sklearn.svm.sparse import LinearSVC

import preprocess

if __name__ == '__main__':
    X_sg, y_sg = preprocess.load_data('data/singular.txt')
    X_pl, y_pl = preprocess.load_data('data/plural.txt')
    X_sg_n_all = preprocess.load_data('data/singular_n.txt', labels=False)
    X_pl_n_all = preprocess.load_data('data/plural_n.txt', labels=False)
    X_sg_n, X_pl_n = [], []
    for sg, pl in zip(X_sg_n_all, X_pl_n_all):
        sg = sg.strip()
        pl = pl.strip()
        if pl.endswith('i') and not pl.endswith('uri'):
            X_sg_n.append(sg)
            X_pl_n.append(pl)
    X_sg_n = np.array(X_sg_n)
    X_pl_n = np.array(X_pl_n)
    scores_sg = np.empty((5, 2, 2))
    predict_sg = np.empty((5, 2, 2))
    best_C_sg = np.empty((5, 2, 2))
    scores_pl = np.empty((5, 2, 2))
    best_C_pl = np.empty((5, 2, 2))
    predict_pl = np.empty((5, 2, 2))

    for i, n in enumerate((2, 3, 4, 5, 6)):
        for j, suffix in enumerate(('', '$')):
示例#29
0
                    help='GPU ID (negative value indicates CPU)')
parser.add_argument('--image_path', '-i', default=None, type=str)
parser.add_argument('--weight', '-w', default="weight/chainer_fcn.weight", type=str)
parser.add_argument('--classes', default=21, type=int)
parser.add_argument('--clop', "-c", default=True, type=bool)
parser.add_argument('--clopsize', "-s", default=224, type=int)
args = parser.parse_args()

img_name = args.image_path.split("/")[-1].split(".")[0]

color_map = make_color_map()
FCN = FullyConvolutionalNetwork()
model = FCN.create_model()
model.load_weights('weights/fcn_params')

o = load_data(args.image_path, size=args.clopsize, mode="original")
x = load_data(args.image_path, size=args.clopsize, mode="data")

start = time()
pred = model.predict(x)
pred = pred[0].argmax(axis=0)
print(pred.max())

row, col = pred.shape
dst = np.ones((row, col, 3))
for i in range(21):
    dst[pred == i] = color_map[i]
img = Image.fromarray(np.uint8(dst))

b,g,r = img.split()
img = Image.merge("RGB", (r, g, b))
示例#30
0
from config import *
import sys



def translate(sentence, index_to_word):
    return ' '.join(map(lambda x: index_to_word[x], sentence))

if __name__ == '__main__':
    global index_to_word, word_to_index  

    word_dim = 3000
    hidden_dim = 100
    sc = 10000

    x, y, index_to_word = load_data(vocabulary_size = word_dim, sentences_count = sc)
    word_to_index = dict([(w,i) for i,w in enumerate(index_to_word)])
    sys.stdout.flush()
    
    rnn = GRU(word_dim, dictionary=index_to_word)
    rnn.load('./models/rnn-theano-100-3000-2016-01-16-09-31-26.npz')

    while True:
        current_sent = [word_to_index[sentence_start_token]]
        print 'reset sentence'
        while current_sent[-1] != word_to_index[sentence_end_token]:
            print '='*100
            print '\tCurrent sentence: {}'.format(translate(current_sent, index_to_word))

            prob = rnn.forward(current_sent)[-1]
            ind = prob.argsort()[-1:-10:-1]
示例#31
0
                max_acc = sum_acc / j
                save_path = "saved_model/model-" + str(i)
                saver.save(sess, save_path=save_path)
                print("Model saved to " + save_path)

        i += 1
    print("maximum accuracy acheived is " + str(max_acc))
    return sess


word_vecs = word_embedings(debug=False)
batch_size = 60
embedding_size = 300

res_train = load_data(
    path=
    r'C:\Users\pravi\PycharmProjects\Sentence_similarity\data\sts\sick2014\SICK_train.txt'
)

train_data_1 = res_train['data_1']
train_data_2 = res_train['data_2']
train_length1 = res_train['length1']
train_length2 = res_train['length2']
labels = res_train['label']
word2Id = res_train['word2Id']
Id2Word = res_train['Id2Word']
max_sequence_length = res_train['max_sequence_length']
vocab_size = res_train['vocab_size']
total_classes = res_train['total_classes']

res_test = load_test_data(
    r'C:\Users\pravi\PycharmProjects\Sentence_similarity\data\sts\sick2014\SICK_test_annotated.txt',
#!/usr/bin/python
  
import sys
from time import time
import preprocess


### features_train and features_test are the features for the training
### and testing datasets, respectively
### labels_train and labels_test are the corresponding item labels
features_train, labels_train, features_test, labels_test = preprocess.load_data()

dataset_size = len(features_train)
features_train = features_train.reshape(dataset_size,-1)

dataset_size_test = len(features_test)
features_test = features_test.reshape(dataset_size_test,-1)





#########################################################


from sklearn import tree

print len(features_train), len(labels_train)

clf = tree.DecisionTreeClassifier(min_samples_split=50)
clf = clf.fit(features_train, labels_train)
示例#33
0
    pl.errorbar(scores[:, 1] + 0.02, scores[:, 4], yerr=scores[:, 7], c="0.5", marker="o")
    if scores2 is not None:
        pl.errorbar(scores2[:, 1] + 0.02, scores2[:, 4], yerr=scores2[:, 7], c="0.5", marker="s")
    pl.ylabel("Plural and combined acc.")
    # ax3.yaxis.set_major_formatter(FuncFormatter(percentages))
    # pl.setp(ax3.get_xticklabels(), visible=False)

    # pl.show()

    for ext in ("pdf", "svg", "png"):
        pl.savefig("train_size-i.%s" % ext)


if __name__ == "__main__":
    print "Loading training and test data..."
    X_sg_all, y_sg_all = preprocess.load_data("data/singular.txt")
    X_pl_all, y_pl_all = preprocess.load_data("data/plural.txt")

    X_sg, y_sg, X_pl, y_pl = [], [], [], []
    for sg, this_y_sg, pl, this_y_pl in zip(X_sg_all, y_sg_all, X_pl_all, y_pl_all):
        # get rid of balauri
        sg = sg.strip()
        pl = pl.strip()
        if not (pl.endswith("uri") and sg.endswith("ur")):
            X_sg.append(sg)
            y_sg.append(this_y_sg)
            X_pl.append(pl)
            y_pl.append(this_y_pl)
    X_sg = np.array(X_sg)
    y_sg = np.array(y_sg)
    X_pl = np.array(X_pl)
示例#34
0

import sys

import numpy as np

from sklearn.svm.sparse import LinearSVC
from preprocess import get_clf, load_data, preprocess_data
from sklearn.metrics import classification_report
from sklearn.cross_validation import KFold, LeaveOneOut
from sklearn.grid_search import GridSearchCV

if __name__ == '__main__':
	filename = 'inf-all-labeled.txt'

	X, y = load_data(filename)
	n = len(X)
	scores = np.empty((5, 2, 2), dtype=np.float)
	best_C = np.empty((5, 2, 2), dtype=np.float)
	for i, ngrams in enumerate((2, 3, 4, 5, 6)):
		for j, suffix in enumerate(('', '$')):
			for k, binarize in enumerate((True, False)):
				print "ngrams=%d, suffix=%s, binarize=%s" % (ngrams, suffix, binarize)
				X_new = preprocess_data(X, n=ngrams, suffix=suffix, binarize=binarize)
				grid = GridSearchCV(estimator=LinearSVC(), n_jobs=4, verbose=False,
							    	param_grid={'C': (0.01, 0.03, 0.1, 0.3, 1, 1.3)},
									cv=LeaveOneOut(n, indices=True))
				grid.fit(X_new, y)
				scores[i, j, k] = grid.best_score
				best_C[i, j, k] = grid.best_estimator.C
示例#35
0
target_colors = "rgb"
    
# Classification settings
pipeline = Pipeline([
    ('extr', InfinitivesExtractor()),
    ('svc', LinearSVC(multi_class=True))
])
parameters = {
    'extr__count': (True,False),
    'extr__n': (3, 4, 5, 6),
    'svc__C': (1e-1, 1e-2, 1e9)
}
grid_search = GridSearchCV(pipeline, parameters)

print "Loading data..."
X, y = load_data()
print "Searching for the best model..."
t0 = time()
grid_search.fit(X, y)
print "Done in %0.3f" % (time() - t0)
print "Best score: %0.3f" % grid_search.best_score
clf = grid_search.best_estimator
print clf
yp = clf.predict(X)
print classification_report(y, yp, targets, target_names)

#pl.figure()
#pl.title("Classification rate for 3-fold stratified CV")
#pl.xlabel("n-gram maximum size")
#pl.ylabel("successful classification rate")
#ns = range(1, 11)
示例#36
0
import preprocess as pp
from const import DTYPES, FEATURES

# Configure logging
logger = logging.getLogger('model')
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)


use_cols = ['Agencia_ID', 'Canal_ID', 'Ruta_SAK', 'Cliente_ID', 'Demanda_uni_equil']
RAW_TRAIN = pp.load_data(path='raw_data/train.csv', use_cols=use_cols)
RAW_TEST = pp.load_data(path='raw_data/test.csv', use_cols=['id', 'Agencia_ID', 'Canal_ID', 'Ruta_SAK', 'Cliente_ID'])


class ClusterXgbModel(object):

    def __init__(self, n_clusters=3, batch_size=1.0, cv_size=0.2):
        self._n_clusters = n_clusters
        self._batch_size = batch_size
        self._cv_size = cv_size
        self._cluster_model = None
        self._U = None
        self._xgb_params = None
        self._xgb_boosters = []

    @staticmethod
示例#37
0
        os.makedirs(config['exp_folder'])


if __name__ == "__main__":
    args = parse_args()
    config = eval(args.prototype)()
    print 'Beginning...'
    # Set up experiment directory.
    create_experiment(config)
    # This will load our training data.
    if os.path.exists(config['exp_folder'] + '/dataset.pkl'):
        print 'Loading from existed data...'
        data = cPickle.load(open(config['exp_folder'] + '/dataset.pkl', 'rb'))
    else:
        print 'Loading from new data...'
        data = load_data(config)

    # Train our model.
    if args.model == 'supervised_simple':
        adem = ADEM(config)
        print 'Training...'
        adem.train_eval(data, config, use_saved_embeddings=False)
        print 'Trained!'
        adem.save()
    else:
        params = params()
        params.LW = config['LW']
        params.LC = config['LC']
        params.outfile = config['outfile']
        params.batchsize = config['batchsize']
        params.hiddensize = config['emb_dim']
示例#38
0
文件: popescu.py 项目: vene/misc-nlp
# Approximately reproducing M. Popescu et al (see paper)
# They use a 3-class SVM with a string kernel, we use a linear
# SVM preceded by feature expansion for similar results

import numpy as np

from sklearn.svm import LinearSVC
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import StratifiedKFold, StratifiedShuffleSplit
import preprocess


if __name__ == "__main__":
    print "Loading training and test data..."
    X_sg, y_sg = preprocess.load_data("data/singular.txt")
    X_sg_n_clean = preprocess.load_data("data/singular_n.txt", labels=False)
    X_sg = np.r_[X_sg, X_sg_n_clean]
    y_sg = np.r_[y_sg, 2 * np.ones(len(X_sg_n_clean))]
    X_sg_p = preprocess.preprocess_data(X_sg, suffix="$", n=5, return_vect=False, binarize=False)

    train_split, test_split = iter(StratifiedShuffleSplit(y_sg, 1, test_size=0.1, random_state=0)).next()

    X_train, y_train = X_sg[train_split], y_sg[train_split]
    X_test, y_test = X_sg[test_split], y_sg[train_split]
    raise Exception
    scores = np.empty((5, 2, 2))
    best_C = np.empty((5, 2, 2))
    vectorizers = np.empty((5, 2, 2), dtype=np.object)
    for i, n in enumerate((2, 3, 4, 5, 6)):
        for j, suffix in enumerate(("", "$")):
            for k, binarize in enumerate((True, False)):
示例#39
0
from keras.models import Sequential
from keras.layers import Input
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD, Adadelta, Adam
import numpy as np
from keras.applications import VGG16
from keras.callbacks import ModelCheckpoint
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator

# Import data
from preprocess import load_data
Xtrain, ytrain, Xval, yval, Xtest, ytest = load_data()
Xtrain = Xtrain.astype('float') / 255.0
Xtest = Xtest.astype('float') / 255.0
Xval = Xval.astype('float') / 255.0


def VGG_16(weights_path=None):
    img_input = Input(shape=(224, 224, 3))
    # Block 1
    x = Conv2D(64, (3, 3),
               activation='relu',
               padding='same',
               name='block1_conv1')(img_input)
    x = Conv2D(64, (3, 3),
               activation='relu',
               padding='same',
               name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
示例#40
0
## Process video
for epoch_id in epoch_ids:
    print('---------- processing video for epoch {} ----------'.format(epoch_id))
    vid_path = utils.join_dir(params.data_dir, 'epoch{:0>2}_front.mkv'.format(epoch_id))
    assert os.path.isfile(vid_path)
    cap = cv2.VideoCapture(vid_path)


    # frame_count = utils.frame_count(vid_path) 
    # this line goes wrong, instead with below code
    ############ New codes added by student ############
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    # import test data: epoch 10
    imgs_test, wheels_test = preprocess.load_data('test')
    imgs_test = np.array(imgs_test)
    imgs_test = imgs_test.astype('float32')
    imgs_test /= 255
    ####################################################

    machine_steering = []

    print('performing inference...')
    time_start = time.time()

    #for frame_id in range(frame_count):
    #    ret, img = cap.read()
    #    assert ret
    # ## you can modify here based on your model
    #    img = img_pre_process(img)