Exemplo n.º 1
0
def add_to_training_data(posts):
    """ Given posts (a list of dicts extracted from StackExchange JSON data), 
    add posts to the training data stored in the database. The model is then
    retrained using all available data.

    Note: If a post ID is already in the training database, it is updated with
    the newly-extracted measurements.
    """

    query = "INSERT INTO trainingdata ("
    query += ', '.join(fields) + ") VALUES "
    
    datavecs = [str(tuple(extract_data_vector(item, True, True))) for item in posts]
    
    query += ",\n".join(datavecs)
    query += " ON DUPLICATE KEY UPDATE "
    query += ','.join(["{0}=VALUES({0})".format(field) for field in fields[1:]])
    query += ';\n'

    f = open('dbase.conf', 'r')
    dbase, user, passwd = f.readline().rstrip().split(',')
    f.close()
    conn = pymysql.connect(user=user, passwd=passwd, db=dbase)
    cur = conn.cursor()
    count = cur.execute(query)
    conn.commit()
    print("Successfully merged {} entries!".format(count))

    cur.close()
    conn.close()

    model.build_model()
Exemplo n.º 2
0
def main():
    import model

    models = [model.build_model('cpu') for _ in range(5)]
    for i, m in enumerate(models):
        m.load_weights('weights-' + str(i) + '.09.hdf5')

    cluster_to_data = load_testing_data()

    for cluster, data in cluster_to_data.iteritems():
        X, flops, y = data

        new_flops = [np.zeros((flop.shape[0], INPUT_LENGTH, flop.shape[1])) for flop in flops]

        # flops: (player, hand, board)
        # new_flops: (player, hand, actions, board)
        # X: (player, hand, actions, action)
        for i, player in enumerate(zip(flops, new_flops, X)):
            for j, (flop, new_flop, X_hand) in enumerate(zip(*player)):
                for k, v in enumerate(X_hand):
                    if v[15] == 1:   # determine if flop has been reached
                        break

                new_flops[i][j] = np.concatenate((np.zeros((k, flop.shape[0])),
                                                  np.tile(np.expand_dims(flop, 0),
                                                          (INPUT_LENGTH - k, 1))))

        flops = [x.astype(int) for x in new_flops]

        losses, individual_losses = evaluate(models, X, flops, y)
        print ("Total Cluster Loss for {n} players: {val}"
               .format(n=len(X), val=losses))
        print "Total Individual Losses:", individual_losses
Exemplo n.º 3
0
def load_model(model_dir, model_weights=None):
    config_json = json.load(open(model_dir + '/config.json'))
    model_json = json.load(open(model_dir + '/model.json'))
    model_json.update(config_json)

    if 'model_cfg' in model_json:
        for k,v in model_json['model_cfg']:
            model_json[k] = v

    if model_weights:
        if isinstance(model_weights, bool) and model_weights is True:
            # Original behavior: only one weights file exists and it has
            # the weights from the best epoch.
            model_json['model_weights'] = model_dir + '/model.h5'
        elif isinstance(model_weights, str):
            # New behavior: multiple weights files may exist.
            if os.path.exists(model_dir + '/' + model_weights):
                model_json['model_weights'] = model_dir + '/' + model_weights
            else:
                model_json['model_weights'] = model_weights
        else:
            raise ValueError('unexpected type for "model_weights" %s (%s)' %
                    (model_weights, type(model_weights)))

    # Re-instantiate ModelConfig using the updated JSON.
    sys.path.append(model_dir)
    from model import build_model
    model_cfg = ModelConfig(**model_json)
    model = build_model(model_cfg)

    return model, model_cfg
Exemplo n.º 4
0
def main():
    print 'Loading model...'
    labels_df = pd.read_csv('labels.csv')
    num_labels = len(labels_df['label'].unique())

    mdl = build_model(num_labels)
    mdl.initialize()
    mdl.load_params_from('models/cnn_handle_frac_last.pkl')


    test_df = pd.read_csv('test_imgs.csv')

    preds = []
    for filename in test_df['filename']:
	latex = Latex2Code(mdl, labels_df, verbose=False)
        print 'Predicting', filename
        img = cv2.imread(filename)
        preds.append(latex.to_latex(img))

    y_true = [s.replace(' ', '') for s in test_df['equation']]
    preds_new = [s.replace(' ', '') for s in preds]

    y_true = np.array(y_true)
    preds_new = np.array(preds_new)

    print 'Accuracy =', 1. * sum(y_true == preds_new) / len(y_true)

    test_df['preds'] = preds
    test_df.to_csv('out.csv')
Exemplo n.º 5
0
def joblib_wrapper(historical_games_trunc, all_stats, bet_info, historical_games_by_tuple, tunable_param_list):
  (moving_averages, transform_params, n_estimators, min_samples_split, min_samples_leaf, bet_threshold) = tunable_param_list

  print 'Building model...'
  X, y = model.build_model_inputs(historical_games_trunc, all_stats, moving_averages, transform_params)
  the_model = model.build_model(X, y, n_estimators=n_estimators, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)
  print 'Evaluating model...'
  winnings = evaluator.evaluate_model(the_model, all_stats, bet_info, historical_games_by_tuple, moving_averages, transform_params, bet_threshold)

  return winnings
Exemplo n.º 6
0
def work():
    config_dict = yaml.load(open(sys.argv[1], 'r'))
    print config_dict

    if config_dict['working_mode'] == 'train_new':
        train, valid, alphabet = build_datasets(config_dict)
        generator, cost = build_model(len(alphabet), config_dict)
        algorithm = build_algorithm(generator, cost, config_dict)
        extensions = build_extensions(cost, algorithm, valid, config_dict)
        main_loop = MainLoop(algorithm=algorithm, data_stream=train,
                             model=Model(cost), extensions=extensions)
        main_loop.run()

    elif config_dict['working_mode'] == 'train_resume':
        # TODO
        pass
Exemplo n.º 7
0
    def __init__(self, config_dict):
        print config_dict
        train, valid, alphabet = build_datasets(config_dict)
        generator, cost = build_model(len(alphabet), config_dict)
        algorithm = build_algorithm(generator, cost, config_dict)
        extensions = build_extensions(cost, algorithm, valid, config_dict)
        main_loop = MainLoop(algorithm=algorithm, data_stream=train,
                             model=Model(cost), extensions=extensions)
        ml = Load(config_dict['checkpoint_path'], load_log=True)
        ml.load_to(main_loop)
        generator = main_loop.model.get_top_bricks()[-1]
        
        self.numbers_from_text = pickle.load(open(config_dict['dict_path']))

        x = tensor.lmatrix('sample')
        cost_cg = generator.cost(x)
        self.cost_f = theano.function([x], cost_cg)
Exemplo n.º 8
0
def test(X_data, Y_data, activation, job_dir, device_name):

    with tf.Graph().as_default() as v_graph:
        (x, y_), _, cost, error_rate, saver = \
                    build_model(activation, \
                                is_learning=False, enable_bn = True,
                                device_name = device_name)

    with tf.Session(graph=v_graph) as sess:
        saver.restore(sess, job_dir)

        cost, acc = sess.run(fetches=[cost, error_rate],
                             feed_dict={
                                 x: X_data,
                                 y_: Y_data
                             })

        print("Test error_rate: %g" % (acc))
        print("Test cost: %g" % (cost))
Exemplo n.º 9
0
def test_learning_rate(lrs=[0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001]):
    X_train, X_valid, X_test, y_train, y_valid, y_test = get_datasets()
    results = {}
    for i in range(len(lrs)):
        temp_results = []
        for j in range(20):
            model = build_model(layers_neurons=[32, 4, 1], lr=lrs[i])
            history = model.fit(X_train, y_train, epochs=20, batch_size=10)
            temp_results.append(history.history["loss"][-1])
        results[lrs[i]] = mean(temp_results)

    with open('results.json', "r") as file:
        data = json.load(file)

    data["learning_rate"] = results

    with open('results.json', "w") as file:
        json.dump(data, file, indent=4)
    return results
Exemplo n.º 10
0
def test_second_layer_size(max_size=6):
    X_train, X_valid, X_test, y_train, y_valid, y_test = get_datasets()
    results = {}
    for i in range(max_size):
        temp_results = []
        for j in range(20):
            model = build_model(layers_neurons=[2**(max_size - 1), 2**i, 1])
            history = model.fit(X_train, y_train, epochs=20, batch_size=10)
            temp_results.append(history.history["loss"][-1])
        results[2**i] = mean(temp_results)

    with open('results.json', "r") as file:
        data = json.load(file)

    data["second_layer_size"] = results

    with open('results.json', "w") as file:
        json.dump(data, file, indent=4)
    return results
Exemplo n.º 11
0
def plot_latent_space(weightsfile):
    print('building model')
    layers = model.build_model()
    batch_size = 128
    decoder_func = theano_funcs.create_decoder_func(layers)

    print('loading weights from %s' % (weightsfile))
    model.load_weights([
        layers['l_decoder_out'],
        layers['l_discriminator_out'],
    ], weightsfile)

    # regularly-spaced grid of points sampled from p(z)
    Z = np.mgrid[2:-2.2:-0.2, -2:2.2:0.2].reshape(2, -1).T[:, ::-1].astype(np.float32)

    reconstructions = []
    print('generating samples')
    for idx in get_batch_idx(Z.shape[0], batch_size):
        Z_batch = Z[idx]
        X_batch = decoder_func(Z_batch)
        reconstructions.append(X_batch)

    X = np.vstack(reconstructions)
    X = X.reshape(X.shape[0], 28, 28)

    fig = plt.figure(1, (12., 12.))
    ax1 = plt.axes(frameon=False)
    ax1.get_xaxis().set_visible(False)
    ax1.get_yaxis().set_visible(False)
    plt.title('samples generated from latent space of autoencoder')
    grid = ImageGrid(
        fig, 111, nrows_ncols=(21, 21),
        share_all=True)

    print('plotting latent space')
    for i, x in enumerate(X):
        img = (x * 255).astype(np.uint8)
        grid[i].imshow(img, cmap='Greys_r')
        grid[i].get_xaxis().set_visible(False)
        grid[i].get_yaxis().set_visible(False)
        grid[i].set_frame_on(False)

    plt.savefig('latent_train_val.png', bbox_inches='tight')
Exemplo n.º 12
0
def test():

    logger = logging.getLogger('MobileNetReID.test')

    # prepare dataloader
    train_loader, val_loader, num_query, num_class = make_data_loader(cfg)
    # prepare model
    model = build_model(cfg, num_class)

    # load param
    ckpt_path = cfg.OUTPUT.ROOT_DIR + cfg.OUTPUT.CKPT_DIR + cfg.TEST.BEST_CKPT
    if os.path.isfile(ckpt_path):
        model.load_param(ckpt_path)
    else:
        logger.info("file: {} is not found".format(ckpt_path))
        exit(1)

    use_gpu = cfg.MODEL.DEVICE == 'cuda'
    device = cfg.MODEL.DEVICE_ID

    if use_gpu:
        model = nn.DataPararallel(model)
        model.to(device)

    model.eval()
    metrics = R1_mAP(num_query, use_gpu=use_gpu)

    with torch.no_grad():
        for batch in val_loader:
            data, pids, camids = batch

            if use_gpu:
                imgs.to(device)
            feats = model(imgs)
            metrics.update(feats, labels, camids)

        cmc, mAP = metrics.compute()
        logger.info("test result as follows")
        logger.info("mAP:{:2%}".format(mAP))
        for r in [1, 5, 10]:
            logger.info("CMC cure, Rank-{:<3}:{:2%}".format(r, cmc[r - 1]))

        print("test is endding")
Exemplo n.º 13
0
def train(cfg):
    # prepare dataset
    train_loader, val_loader, test_loader, classes_list = make_data_loader(
        cfg, for_train=True)

    # build model and load parameter
    model = build_model(cfg)
    if cfg.SOLVER.SCHEDULER.RETRAIN_FROM_HEAD == True:
        if cfg.TRAIN.TRICK.PRETRAINED == True:
            model.load_param("Base", cfg.TRAIN.TRICK.PRETRAIN_PATH)
    else:
        if cfg.TRAIN.TRICK.PRETRAINED == True:
            model.load_param("Overall", cfg.TRAIN.TRICK.PRETRAIN_PATH)

    train_loader.dataset.batch_converter = model.backbone_batch_converter
    val_loader.dataset.batch_converter = model.backbone_batch_converter
    test_loader.dataset.batch_converter = model.backbone_batch_converter

    # build loss function
    loss_func, loss_class = build_loss(cfg)
    print('Train with losses:', cfg.LOSS.TYPE)

    # build optimizer (based on model)
    optimizer = build_optimizer(cfg, model,
                                bias_free=cfg.MODEL.BIAS_FREE)  #loss里也可能有参数
    print("Model Bias-Free:{}".format(cfg.MODEL.BIAS_FREE))
    print('Train with the optimizer type is', cfg.SOLVER.OPTIMIZER.NAME)

    # build scheduler (based on optimizer)
    scheduler, start_epoch = build_scheduler(cfg, optimizer)

    # build and launch engine for training
    do_train(
        cfg,
        model,
        train_loader,
        val_loader,
        classes_list,
        optimizer,
        scheduler,
        loss_func,
        start_epoch,
    )
Exemplo n.º 14
0
def translate(src_text, config, model_path, beam=5):
    """
    Translate from a source language to a target language using
    the model at `model_path` whose config is described by the config
    at `config`.

    The translation uses a beam search of width `beam`.
    """
    params, project_dir = \
        parse_config(config, batch_size=1)

    # Tokenize the sentence.
    p = Popen([
        'perl', 'data/moses/tokenizer/tokenizer.perl', '-threads', '8', '-a',
        '-l', 'fr'
    ],
              stdin=PIPE,
              stdout=PIPE,
              stderr=PIPE)
    stdout, stderr = p.communicate(src_text.encode('utf-8'))
    stdout = stdout.decode('utf-8')

    # Build PyTorch model.
    model, src_vocab, tgt_vocab = build_model(params, project_dir)

    # Load saved model.
    if params['cpu']:
        device = torch.device('cpu')
    else:
        device = torch.device(params['gpu_ids'][0])
    load_model(model, model_path, device)

    # Prepare input vector.
    src_toks = src_vocab.to_ints(stdout)[:MAX_LENGTH]
    src_data = torch.tensor([src_toks]).to(device)
    max_tgt_length = min(MAX_LENGTH,
                         int(max(len(src_toks) * 1.5,
                                 len(src_toks) + 3)))

    # Beam search.
    out_data = beam_search(model, src_data, beam, max_tgt_length)
    out_text = tgt_vocab.to_text(out_data)[0]
    print(out_text)
Exemplo n.º 15
0
def run(item_fp,
        bs=512,
        save_dir=None,
        fold=None,
        model_path=None,
        num_epochs=10,
        st_epoch=0,
        stop_window=3,
        test=False,
        gene_em=False):
    items = pd.read_csv(item_fp)
    num_class = items['label'].nunique()
    lg.info('item shape: %s, num_class:%s', items.shape, num_class)

    if save_dir is None:
        save_dir = '{}_{}'.format(
            'result',
            datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    seed = 42
    random.seed(seed)
    np.random.seed(seed)
    model_ft = build_model(num_class, embedding_dim)

    if model_path is not None:
        model_ft.load_state_dict(torch.load('{}'.format(model_path)))

    train_model(items,
                model_ft,
                save_dir,
                num_epochs=num_epochs,
                st_epoch=st_epoch,
                stop_window=stop_window,
                bs=bs,
                test=test)
    # reset for next fold
    model_path = None
    st_epoch = 0
    if test:
        lg.info('just test, return')
Exemplo n.º 16
0
def plot_latent_space(weightsfile):
    print('building model')
    layers = model.build_model()
    batch_size = 128
    decoder_func = theano_funcs.create_decoder_func(layers)

    print('loading weights from %s' % (weightsfile))
    model.load_weights([
        layers['l_decoder_out'],
        layers['l_discriminator_out'],
    ], weightsfile)

    # regularly-spaced grid of points sampled from p(z)
    Z = np.mgrid[2:-2.2:-0.2,
                 -2:2.2:0.2].reshape(2, -1).T[:, ::-1].astype(np.float32)

    reconstructions = []
    print('generating samples')
    for idx in get_batch_idx(Z.shape[0], batch_size):
        Z_batch = Z[idx]
        X_batch = decoder_func(Z_batch)
        reconstructions.append(X_batch)

    X = np.vstack(reconstructions)
    X = X.reshape(X.shape[0], 28, 28)

    fig = plt.figure(1, (12., 12.))
    ax1 = plt.axes(frameon=False)
    ax1.get_xaxis().set_visible(False)
    ax1.get_yaxis().set_visible(False)
    plt.title('samples generated from latent space of autoencoder')
    grid = ImageGrid(fig, 111, nrows_ncols=(21, 21), share_all=True)

    print('plotting latent space')
    for i, x in enumerate(X):
        img = (x * 255).astype(np.uint8)
        grid[i].imshow(img, cmap='Greys_r')
        grid[i].get_xaxis().set_visible(False)
        grid[i].get_yaxis().set_visible(False)
        grid[i].set_frame_on(False)

    plt.savefig('latent_train_val.png', bbox_inches='tight')
Exemplo n.º 17
0
def query(image_name,
          username="******",
          dataset="market1501",
          model_name="ssnetv4"):

    path = data_root + "{}/{}/query/".format(username, dataset)

    #merge the config file
    config_file = root + "/configs/{}".format(configs[model_name])
    cfg.merge_from_file(config_file)

    # 更新chekpoint
    cfg.MODEL.PRETRAIN_PATH = cfg.MODEL.PRETRAIN_PATH + "{}/{}.pth".format(
        dataset, model_name)

    img_path = path + image_name
    print("query:", img_path)
    # 1、数据准备
    query = make_batch_data(cfg, [img_path])  # return a list of data

    # print(len(res))
    # 2、准备模型
    model = build_model(cfg, numids[dataset])
    # print(model)
    imgs, pids, camids, paths = query
    # img = img.unsqueeze(0)
    img = imgs[0]

    # print(img.size())
    # 设置为eval 模式
    model.eval()
    result = model(img)

    query[0] = result
    # create the ranker
    ranker = Top5(username, data_root)
    # set the data
    ranker.set_gallery(dataset, model_name)
    ranker.set_query(query)

    result = ranker.compute()
    return result
Exemplo n.º 18
0
def main(cfg):
    torch.cuda.empty_cache()
    torch.manual_seed(cfg.param.seed)

    # Training settings
    cwd = Path(hydra.utils.get_original_cwd())
    wsi_dir = cwd/cfg.dir.wsi
    patch_dir = cwd/cfg.dir.patch
    ckpt = Checkpoint(
        cwd, cfg.gpus, cfg.dir.resume, cfg.dir.save_to, cfg.log.save_model)

    device = torch.device(
        f"cuda:{cfg.gpus[0]}" if cfg.gpus[0] != -1 else "cpu")

    model = build_model(gpus=cfg.gpus)
    optimizer = RAdam(model.parameters(), lr=cfg.param.lr)
    scheduler = StepLR(optimizer, step_size=1, gamma=cfg.param.gamma)
    if cfg.dir.resume:
        model, optimizer, scheduler = ckpt.load_state(
            model, optimizer, scheduler)
    criterion = get_loss_fn()

    train_wsi, test_wsi = split_wsi(
        wsi_dir, ckpt.save_to, cwd, ratio=cfg.data.ratio,
        projects=cfg.data.projects, strategies=cfg.data.strategies,
        limit=cfg.data.limit)
    for epoch in range(ckpt.start_epoch, cfg.param.epochs + 1):
        split_data(
            patch_dir, ckpt.save_to, train_wsi, test_wsi, cfg.data.chunks,
            epoch, cfg.dir.resume)
        for chunk in range(ckpt.start_chunk, cfg.data.chunks):
            data_loader = get_loaders(
                cfg.param.batch_size, ckpt.save_to, chunk, cfg.gpus)
            train(
                model, device, data_loader, optimizer, scheduler, criterion,
                epoch, cfg.param.epochs, chunk, cfg.data.chunks, ckpt)

        ckpt.start_chunk = 0
        scheduler.step()
        ckpt.save(model, optimizer, scheduler, epoch, chunk, loss=False)

    ckpt.close_writer()
Exemplo n.º 19
0
def model_train(bertvec, y):
    model = build_model(maxlen)
    model.summary()
    best_model_path = 'model/keras_bert.h5'
    adlearningRate = ReduceLROnPlateau(monitor='val_loss',
                                       factor=0.1,
                                       patience=10,
                                       verbose=0,
                                       mode='min',
                                       epsilon=0.0001,
                                       cooldown=0,
                                       min_lr=0)
    earlyStopping = EarlyStopping(monitor='val_acc',
                                  patience=10,
                                  verbose=1,
                                  mode='max')
    saveBestModel = ModelCheckpoint(best_model_path,
                                    save_weights_only=True,
                                    monitor='val_acc',
                                    verbose=1,
                                    save_best_only=True,
                                    mode='max')
    tensorboard = TensorBoard(log_dir='tensorboard',
                              histogram_freq=0,
                              write_graph=True,
                              write_grads=False,
                              write_images=True)
    model.fit(
        bertvec,
        y,
        batch_size=64,
        epochs=1,
        validation_split=0.2,
        shuffle=True,
        callbacks=[tensorboard, earlyStopping, saveBestModel, adlearningRate])
    # weight to json
    model_json = model.to_json()
    with open("model/weight/model.json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights("model/weight/model.h5")
    print("Saved model to disk")
Exemplo n.º 20
0
def evaluation(cfg, dataset='val'):
    model = build_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    # load last checkpoint
    assert cfg.MODEL.WEIGHTS is not ""
    model.load_state_dict(torch.load(cfg.MODEL.WEIGHTS))

    # build the dataloader
    dataloader = make_data_loader(cfg, dataset)

    # start the inferring procedure
    do_evaluation(
        cfg,
        model,
        dataloader,
        device,
        verbose=True
    )
Exemplo n.º 21
0
    def pre_load_model(self, ml_mode=ML_MODE_COLORIZE):
        if (ml_mode == ML_MODE_COLORIZE):
            model_weights_path = 'models/model.06-2.5489.hdf5'
            self.model = build_model()
            self.model.load_weights(model_weights_path)

            print(self.model.summary())

            # Load the array of quantized ab value
            self.q_ab = np.load("data/pts_in_hull.npy")
            self.nb_q = self.q_ab.shape[0]

            # Fit a NN to q_ab
            self.nn_finder = nn.NearestNeighbors(n_neighbors=nb_neighbors,
                                                 algorithm='ball_tree').fit(
                                                     self.q_ab)
        elif (ml_mode == ML_MODE_SUPER_RES):
            print("Super res not yet implimented in TF_COLORISE class")
        else:
            print("Invalid ML Mode: ", ml_mode)
Exemplo n.º 22
0
def train(posFastaFile, negFastaFile, posValFasta, negValFasta, parameters):
    print "Reading input files..."
    positives = fasta.load_fasta(posFastaFile, parameters['min_length'])
    negatives = fasta.load_fasta(negFastaFile, parameters['min_length'])
    valpos = fasta.load_fasta(posValFasta, parameters['min_length'])
    valneg = fasta.load_fasta(negValFasta, parameters['min_length'])
    train = positives, negatives
    val = valpos, valneg
    print "Building new model..."
    mRNN = model.build_model(parameters['weights'],
                             parameters['embedding_size'],
                             parameters['recurrent_gate_size'], 5,
                             parameters['dropout'])
    print inspect.getmodule(mRNN.__class__)
    print "Training model..."
    mRNN = model.train_model(mRNN, train, val, parameters['epochs'],
                             parameters['output'], parameters['max_length'],
                             parameters['save_freq'],
                             parameters['early_stopping'])
    return mRNN
Exemplo n.º 23
0
def main():
    # 指定GPU
    set_gpu()

    # 加载数据
    train, valid = load_train_data()
    test_images, test_visits = load_test_data()

    # 构建模型
    model = build_model(num_classes)

    # 训练模型
    callbacks = set_callbacks(model_path)
    train_model(model, train, valid, callbacks, batch_size, epochs)

    # 评估模型
    eval_model(model, valid)

    # 预测结果
    predict_model(model, test_images, test_visits, test_file_pre_npy_path, result_data_path)
Exemplo n.º 24
0
def main():
    args = get_arguments()
    model_weight_path = Path(args.path)
    if not model_weight_path.exists():
        raise FileExistsError(model_weight_path)
    output_path = Path(args.output_path)
    if not output_path.exists():
        raise FileExistsError(output_path)
    width, height = 224, 224
    num_channels = 3
    num_classes = len(USE_LABELS)
    input_shapes = (height, width, num_channels)
    base_model = resnet_v2.ResNet101V2(include_top=False,
                                       weights='imagenet',
                                       input_shape=input_shapes)
    model = build_model(base_model, n_classes=num_classes)
    model.load_weights(str(model_weight_path))

    tf.keras.backend.clear_session()
    model.save(str(output_path), save_format="tf")
Exemplo n.º 25
0
    def build_model(self):
        self.net_bone = build_model(base_model_cfg)
        if self.config.cuda:
            self.net_bone = self.net_bone.cuda()
            
        self.net_bone.eval()  # use_global_stats = True
        self.net_bone.apply(weights_init)
        if self.config.mode == 'train':
            if self.config.load_bone == '':
                if base_model_cfg == 'vgg':
                    self.net_bone.base.load_pretrained_model(torch.load(self.config.vgg))
                elif base_model_cfg == 'resnet':
                    self.net_bone.base.load_state_dict(torch.load(self.config.resnet))
            if self.config.load_bone != '': self.net_bone.load_state_dict(torch.load(self.config.load_bone))

        self.lr_bone = p['lr_bone']
        self.lr_branch = p['lr_branch']
        self.optimizer_bone = Adam(filter(lambda p: p.requires_grad, self.net_bone.parameters()), lr=self.lr_bone, weight_decay=p['wd'])

        self.print_network(self.net_bone, 'trueUnify bone part')
Exemplo n.º 26
0
def main():
    args = parse_args()

    callbacks = None
    if args.save:
        logdir = 'logdir/{}_{:03d}'.format("yelp_photos",
                                           len(glob.glob('logdir/*')))
        print('Saving to {}'.format(logdir))
        callbacks = [
            keras.callbacks.ModelCheckpoint(
                os.path.join(logdir, 'mobilenetv2.h5')),
            keras.callbacks.TensorBoard(log_dir=logdir)
        ]

    ds_train, train_info = build_yelp_dataset(split='train',
                                              image_shape=(args.res, args.res),
                                              rotate=True,
                                              batch_size=args.batch_size)
    ds_test, test_info = build_yelp_dataset(split='test',
                                            image_shape=(args.res, args.res),
                                            rotate=True,
                                            batch_size=args.batch_size)

    model = build_model(base_weights=args.base_model,
                        classes=train_info["classes"],
                        input_shape=(args.res, args.res, 3),
                        full_weights=args.full_model)

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=args.lr),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(ds_train,
              callbacks=callbacks,
              epochs=args.epochs,
              steps_per_epoch=train_info["length"],
              validation_data=ds_test,
              validation_steps=test_info["length"])

    if args.save:
        model.layers[0].save(os.path.join(logdir, 'mobilenetv2_base.h5'))
Exemplo n.º 27
0
Arquivo: eval.py Projeto: mydkzgj/esm
def eval(cfg, target_set_name="test"):
    # prepare dataset
    train_loader, val_loader, test_loader, classes_list = make_data_loader(cfg, for_train=False)
    num_classes = len(classes_list)

    # build model and load parameter
    model = build_model(cfg)
    model.load_param("Overall", cfg.TEST.WEIGHT)

    # build loss function
    loss_func, loss_class = build_loss(cfg)
    print('Eval with losses:', cfg.LOSS.TYPE)

    # input data_loader
    if target_set_name == "train":
        input_data_loader = train_loader
    elif target_set_name == "valid":
        input_data_loader = val_loader
    elif target_set_name == "test":
        input_data_loader = test_loader
    else:
        raise Exception("Wrong Dataset Name!")

    # build and launch engine for evaluation
    metrics = do_inference(cfg,
                           model,
                           input_data_loader,
                           classes_list,
                           loss_func,
                           target_set_name=target_set_name,
                           plotFlag=True)

    # logging with tensorboard summaryWriter
    model_epoch = cfg.TEST.WEIGHT.split('/')[-1].split('.')[0].split('_')[-1]
    model_iteration = len(train_loader) * int(model_epoch) if model_epoch.isdigit() == True else 0

    writer_test = SummaryWriter(cfg.SOLVER.OUTPUT_DIR + "/summary/eval_" + target_set_name)

    writer_test.add_scalar("MSE", metrics["mse"], model_iteration)

    writer_test.close()
Exemplo n.º 28
0
def train(config, experiment_name=None):
    num_classes = config.MODEL.NUM_CLASSES

    # dataloader for training
    train_period = 'train'
    train_loader = build_dataloader(cfg=config,
                                    period=train_period,
                                    loader_type='train')
    val_loader = build_dataloader(cfg=config,
                                  period=train_period,
                                  loader_type='val')

    # prepare model
    model = build_model(cfg=config)

    print('The loss type is', config.MODEL.LOSS_TYPE)
    loss_func = build_loss(config, num_classes)
    optimizer = build_optimizer(config, model)

    # Add for using self trained model
    if config.MODEL.PRETRAIN_CHOICE == 'self':
        start_epoch = eval(
            config.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_')
            [-1])
        print('Start epoch:', start_epoch)
        path_to_optimizer = config.MODEL.PRETRAIN_PATH.replace(
            'model', 'optimizer')
        print('Path to the checkpoint of optimizer:', path_to_optimizer)
        model.load_state_dict(torch.load(config.MODEL.PRETRAIN_PATH))
        optimizer.load_state_dict(torch.load(path_to_optimizer))

    scheduler = WarmUpMultiStepLR(optimizer, config.SOLVER.STEPS,
                                  config.SOLVER.GAMMA,
                                  config.SOLVER.WARMUP_FACTOR,
                                  config.SOLVER.WARMUP_ITERS,
                                  config.SOLVER.WARMUP_METHOD)

    print('------------------ Start Training -------------------')
    do_train(config, model, train_loader, val_loader, optimizer, scheduler,
             loss_func, experiment_name)
    print('---------------- Training Completed ---------------- ')
Exemplo n.º 29
0
def main():
    # set mode
    try:
        mode = sys.argv[1]
        assert (mode == 'dnn' or mode == 'cnn')
    except:
        print('Error: Model mode not found')
        exit()

    # set parameters
    # TODO choose good numbers of batch size and epoch
    batch = 32
    epoch = 20

    # load data
    tr_feats, te_feats, tr_labels, te_labels = read_dataset()

    # data augmentation
    # TODO set up the parameters for 'ImageDataGenerator'
    augment_gen = ImageDataGenerator()
    origin_gen = ImageDataGenerator()

    # build model
    emotion_classifier = model.build_model(mode)

    # start training
    emotion_classifier.fit_generator(augment_gen.flow(tr_feats,
                                                      tr_labels,
                                                      batch_size=batch,
                                                      seed=0),
                                     steps_per_epoch=len(tr_feats) // batch,
                                     validation_data=origin_gen.flow(
                                         te_feats,
                                         te_labels,
                                         batch_size=batch,
                                         seed=0),
                                     validation_steps=len(te_feats) // batch,
                                     epochs=epoch)

    # save model
    emotion_classifier.save_weights(mode + '.h5')
Exemplo n.º 30
0
def train(notes, char_to_idx, uniqueNotesLen, epochs=100, save_freq=10):

    #model_architecture
    model = build_model(BATCH_SIZE, SEQ_LENGTH, vocab_size)
    model.summary()
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    #Train data generation
    T = np.asarray(
        [char_to_idx[c] for c in notes],
        dtype=np.int32)  #convert complete text into numerical indices
    #T_norm = T / float(uniqueNotesLen)
    print("Length of text:" + str(T.size))
    print("Length of unique test: ,", uniqueNotesLen)

    steps_per_epoch = (len(notes) / BATCH_SIZE - 1) / SEQ_LENGTH
    print('Steps per epoch : ', steps_per_epoch)

    log = TrainLogger('training_log.csv')

    for epoch in range(epochs):
        print('\nEpoch {}/{}'.format(epoch + 1, epochs))

        losses, accs = [], []
        msg = ""
        for i, (X, Y) in enumerate(read_batches(T, vocab_size)):

            print(X)

            loss, acc = model.train_on_batch(X, Y)
            print('Batch {}: loss = {}, acc = {}'.format(i + 1, loss, acc))
            losses.append(loss)
            accs.append(acc)

        log.add_entry(np.average(losses), np.average(accs))

        if (epoch + 1) % save_freq == 0:
            save_weights(epoch + 1, model)
            print('Saved checkpoint to', 'weights.{}.h5'.format(epoch + 1))
Exemplo n.º 31
0
def main():
    rmtree('out')
    makedirs('out')

    parser = argparse.ArgumentParser()
    parser.add_argument('content_image')
    parser.add_argument('style_image')
    args = parser.parse_args()

    content_input = matrix_from_image_file(args.content_image, 0.1)
    style_input = matrix_from_image_file(args.style_image)

    model = build_model(layers, content_input, style_input)
    wp = WeightProvider()

    for i in range(10000):
        content_weights, style_weights = wp.get_weights()

        print('Using weights:\n{}\n{}'.format(content_weights, style_weights))
        outfile = f'out/run_{i:0>3}.png'

        rel_content_weights = [
            w * CONTENT_WEIGHT_MULTIPLIER for w in content_weights
        ]

        img, losses = run_model(model, RUNS_PER_EPOCH, content_input.shape,
                                rel_content_weights, style_weights)
        img.save(outfile)

        r = {
            'img':
            outfile,
            'layers':
            package_layers(
                zip(layers, content_weights, losses[:8], style_weights,
                    losses[8:])),
            'parts': ['content', 'style'],
        }

        with open(RESULTS_FILE, 'w') as fh:
            dump(r, fh)
Exemplo n.º 32
0
def plot_autoencoder(weightsfile):
    print('building model')
    layers = model.build_model()

    batch_size = 128

    print('compiling theano function')
    encoder_func = theano_funcs.create_encoder_func(layers)

    print('loading weights from %s' % (weightsfile))
    model.load_weights([
        layers['l_decoder_out'],
        layers['l_discriminator_out'],
    ], weightsfile)

    print('loading data')
    X_train, y_train, X_test, y_test = utils.load_mnist()

    train_datapoints = []
    print('transforming training data')
    for train_idx in get_batch_idx(X_train.shape[0], batch_size):
        X_train_batch = X_train[train_idx]
        train_batch_codes = encoder_func(X_train_batch)
        train_datapoints.append(train_batch_codes)

    test_datapoints = []
    print('transforming test data')
    for test_idx in get_batch_idx(X_test.shape[0], batch_size):
        X_test_batch = X_test[test_idx]
        test_batch_codes = encoder_func(X_test_batch)
        test_datapoints.append(test_batch_codes)

    Z_train = np.vstack(train_datapoints)
    Z_test = np.vstack(test_datapoints)

    plot(Z_train,
         y_train,
         Z_test,
         y_test,
         filename='adversarial_train_val.png',
         title='projected onto latent space of autoencoder')
Exemplo n.º 33
0
def test_eval():
    data_root = "data_dir"
    dataset = AudiobookDataset(data_root)
    if hp.input_type == 'raw':
        collate_fn = raw_collate
    elif hp.input_type == 'bits':
        collate_fn = discrete_collate
    else:
        raise ValueError("input_type:{} not supported".format(hp.input_type))
    data_loader = DataLoader(dataset,
                             collate_fn=collate_fn,
                             shuffle=True,
                             num_workers=0,
                             batch_size=hp.batch_size)
    device = torch.device("cuda" if use_cuda else "cpu")
    print("using device:{}".format(device))

    # build model, create optimizer
    model = build_model().to(device)

    evaluate_model(model, data_loader)
Exemplo n.º 34
0
Arquivo: main.py Projeto: c0d3d/SAD
def main(f=None):
    args = parser.parse_args()
    if args.cmd_type == "embeddings":
        build_embeddings(args.data, args.output)
    elif args.cmd_type == "train":
        print("Loading embeddings ...")
        embeddings = EmbeddingsData.load(args.embeddings_data_file)
        print("Loading the data ...")
        the_data = Data.make_data(args.train_file, args.dev_file,
                                  args.batch_size)
        print("Building the model ...")
        model = build_model(embeddings, args.batch_size)
        train_sess = Training.make_training(model, the_data, args.epoch_count)
        while train_sess.has_more_epochs():
            print("Next epoch ...")
            train_sess.next_epoch()
        # TODO save model
    else:
        # ?
        exit(1)
    return 0
def test_space(spaces, remove_bad_topologies=True):
    pp = pprint.PrettyPrinter(indent=4)
    for i, space in enumerate(spaces):
        logm(f'Testing space [{i+1} of {len(spaces)}]',
             cur_frame=currentframe(),
             mtype='I')
        pp.pprint(space)
        try:
            K.clear_session()
            model = build_model(conf,
                                space,
                                input_shape=(SPEC_SHAPE_HEIGTH,
                                             SPEC_SHAPE_WIDTH, CHANNELS))
        except ValueError as err:
            logm(f'Failed when building the model: {str(err)} ',
                 cur_frame=currentframe(),
                 mtype='I')
            if remove_bad_topologies:
                del space
            continue
    return spaces
Exemplo n.º 36
0
def test_momentum(m=[0.99, 0.97, 0.95, 0.93, 0.91]):
    X_train, X_valid, X_test, y_train, y_valid, y_test = get_datasets()
    results = {}
    for i in range(len(m)):
        temp_results = []
        for j in range(20):
            model = build_model(layers_neurons=[32, 4, 1],
                                lr=0.001,
                                momentum=m[i])
            history = model.fit(X_train, y_train, epochs=20, batch_size=10)
            temp_results.append(history.history["loss"][-1])
        results[m[i]] = mean(temp_results)

    with open('results.json', "r") as file:
        data = json.load(file)

    data["momentum"] = results

    with open('results.json', "w") as file:
        json.dump(data, file, indent=4)
    return results
Exemplo n.º 37
0
    def train(self):
        '''训练模型'''
        # number_of_epoch = len(self.files_content) // self.config.batch_size
        number_of_epoch = 50
        builded_model = model.build_model(self.config, self.num2word,
                                          self.words)
        self.model = builded_model

        self.model.summary()

        history = self.model.fit_generator(
            generator=self.data_generator(),
            verbose=True,
            steps_per_epoch=self.config.batch_size,
            epochs=number_of_epoch,
            callbacks=[
                keras.callbacks.ModelCheckpoint(self.config.weight_file,
                                                save_weights_only=False),
                LambdaCallback(on_epoch_end=self.generate_sample_result)
            ])
        utils.result_image(history)
Exemplo n.º 38
0
    def test_train_model(self):
        """
        Test if function returns trained model
        """
        texts, labels = preprocess_labels(data_dir_path="data/mock_aclImdb",
                                          dataset="train")
        vectorized_texts, word_index = tokenize_data(texts)
        mock_X_train, mock_y_train, mock_X_val, mock_y_val = split_data(
            vectorized_texts, labels)

        mock_embedding_matrix = pickle.load(
            open("models/mock_glove.6B/mock_embedding_matrix.p", "rb"))
        mock_model = build_model(mock_embedding_matrix)

        mock_trained_model = train_model(mock_model,
                                         (mock_X_train, mock_y_train),
                                         (mock_X_val, mock_y_val))

        self.assertIsNotNone(mock_trained_model[1], "no model trained")
        self.assertIsNotNone(mock_trained_model[0],
                             "history dict doesn't exist")
Exemplo n.º 39
0
def plot_autoencoder(weightsfile):
    print('building model')
    layers = model.build_model()

    batch_size = 128

    print('compiling theano function')
    encoder_func = theano_funcs.create_encoder_func(layers)

    print('loading weights from %s' % (weightsfile))
    model.load_weights([
        layers['l_decoder_out'],
        layers['l_discriminator_out'],
    ], weightsfile)

    print('loading data')
    X_train, y_train, X_test, y_test = utils.load_mnist()

    train_datapoints = []
    print('transforming training data')
    for train_idx in get_batch_idx(X_train.shape[0], batch_size):
        X_train_batch = X_train[train_idx]
        train_batch_codes = encoder_func(X_train_batch)
        train_datapoints.append(train_batch_codes)

    test_datapoints = []
    print('transforming test data')
    for test_idx in get_batch_idx(X_test.shape[0], batch_size):
        X_test_batch = X_test[test_idx]
        test_batch_codes = encoder_func(X_test_batch)
        test_datapoints.append(test_batch_codes)

    Z_train = np.vstack(train_datapoints)
    Z_test = np.vstack(test_datapoints)

    plot(Z_train, y_train, Z_test, y_test,
         filename='adversarial_train_val.png',
         title='projected onto latent space of autoencoder')
Exemplo n.º 40
0
def main(args):
    model_id = build_model_id(args)
    model_path = build_model_path(args, model_id)
    setup_model_dir(args, model_path)

    rng = np.random.RandomState(args.seed)

    json_cfg = load_model_json(args, x_train=None, n_classes=None)
    model_cfg = ModelConfig(**json_cfg)
    if args.verbose:
        print("model_cfg " + str(model_cfg))

    sys.path.append(args.model_dir)
    import model
    from model import build_model, fit_model, load_train, load_validation

    train_data = load_train(args, model_cfg)
    validation_data = load_validation(args, model_cfg)

    if args.verbose:
        print("loading model")
    model = build_model(model_cfg, train_data, validation_data)
    fit_model(model, train_data, validation_data, args)
Exemplo n.º 41
0
def get_prediction_function(feature_layer = None):
    '''
    Get prediction function (C3D and Video2GIF combined)
    @param feature_layer: a layer name (see model.py). If provided, pred_fn returns (score, and the activations at feature_layer)
    @return: theano function that scores sniplets
    '''
    print('Load weights and compile model...')

    # Build model
    net= model.build_model(batch_size=2)

    # Set the weights (takes some time)
    model.set_weights(net['score'],config.get('paths','c3d_weight_file'),config.get('paths','video2gif_weight_file'))
    layer='score'
    prediction = lasagne.layers.get_output(net[layer], deterministic=True)
    if feature_layer:
        features = lasagne.layers.get_output(net[feature_layer], deterministic=True)
        pred_fn = theano.function([net['input'].input_var], [prediction, features], allow_input_downcast = True)
    else:
        pred_fn = theano.function([net['input'].input_var], prediction, allow_input_downcast = True)


    return pred_fn
Exemplo n.º 42
0
def trainer(data='coco',  #f8k, f30k, coco
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            encoder='gru',  # gru OR bow
            max_epochs=15,
            dispFreq=10,
            decay_c=0.,
            grad_clip=2.,
            maxlen_w=100,
            optimizer='adam',
            batch_size = 128,
            saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz',
            validFreq=100,
            lrate=0.0002,
            reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # Load training and development sets
    print 'Loading dataset'
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train[0]+dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl'%saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Building sentence encoder'
    trng, inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    trng, inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    curr = 0.
    n_samples = 0
    
    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['f_senc'] = f_senc
                curr_model['f_ienc'] = f_ienc

                ls = encode_sentences(curr_model, dev[0])
                lim = encode_images(curr_model, dev[1])

                (r1, r5, r10, medr) = i2t(lim, ls)
                print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print 'Saving...',
                    params = unzip(tparams)
                    numpy.savez(saveto, **params)
                    pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                    print 'Done'

        print 'Seen %d samples'%n_samples
Exemplo n.º 43
0
import torchfile

import numpy as np
import time, sys

from model import build_model
from util import *

# constants
width = 128
loss_lambda = 0.1
checkpoint_dir = sys.argv[1]

# model
# grasp_class_prediction, depth_prediction, logit, grasp_image_ph, keep_prob_ph = build_model(width)
grasp_class_prediction, logit, grasp_image_ph, keep_prob_ph = build_model(width)
depth_image_ph =  tf.placeholder('float', [None, width, width, 1])
grasp_class_ph =  tf.placeholder('int64', [None])

# loss
grasp_class_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logit, grasp_class_ph), name='grasp_class_loss')
# depth_loss = tf.reduce_mean(tf.square(depth_image_ph - depth_prediction), name='depth_loss')
# combined_loss = (1. - loss_lambda) * grasp_class_loss + loss_lambda * depth_loss
combined_loss = grasp_class_loss

# evaluation
batch = int(sys.argv[2])
correct_prediction = tf.equal(tf.argmax(grasp_class_prediction, 1), grasp_class_ph)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1)
Exemplo n.º 44
0
def train_autoencoder():
    print('building model')
    layers = model.build_model()

    max_epochs = 5000
    batch_size = 128
    weightsfile = join('weights', 'weights_train_val.pickle')

    print('compiling theano functions for training')
    print('  encoder/decoder')
    encoder_decoder_update = theano_funcs.create_encoder_decoder_func(
        layers, apply_updates=True)
    print('  discriminator')
    discriminator_update = theano_funcs.create_discriminator_func(
        layers, apply_updates=True)
    print('  generator')
    generator_update = theano_funcs.create_generator_func(
        layers, apply_updates=True)

    print('compiling theano functions for validation')
    print('  encoder/decoder')
    encoder_decoder_func = theano_funcs.create_encoder_decoder_func(layers)
    print('  discriminator')
    discriminator_func = theano_funcs.create_discriminator_func(layers)
    print('  generator')
    generator_func = theano_funcs.create_generator_func(layers)

    print('loading data')
    X_train, y_train, X_test, y_test = utils.load_mnist()

    try:
        for epoch in range(1, max_epochs + 1):
            print('epoch %d' % (epoch))

            # compute loss on training data and apply gradient updates
            train_reconstruction_losses = []
            train_discriminative_losses = []
            train_generative_losses = []
            for train_idx in get_batch_idx(X_train.shape[0], batch_size):
                X_train_batch = X_train[train_idx]
                # 1.) update the encoder/decoder to min. reconstruction loss
                train_batch_reconstruction_loss =\
                    encoder_decoder_update(X_train_batch)

                # sample from p(z)
                pz_train_batch = np.random.uniform(
                    low=-2, high=2,
                    size=(X_train_batch.shape[0], 2)).astype(
                        np.float32)

                # 2.) update discriminator to separate q(z|x) from p(z)
                train_batch_discriminative_loss =\
                    discriminator_update(X_train_batch, pz_train_batch)

                # 3.)  update generator to output q(z|x) that mimic p(z)
                train_batch_generative_loss = generator_update(X_train_batch)

                train_reconstruction_losses.append(
                    train_batch_reconstruction_loss)
                train_discriminative_losses.append(
                    train_batch_discriminative_loss)
                train_generative_losses.append(
                    train_batch_generative_loss)

            # average over minibatches
            train_reconstruction_losses_mean = np.mean(
                train_reconstruction_losses)
            train_discriminative_losses_mean = np.mean(
                train_discriminative_losses)
            train_generative_losses_mean = np.mean(
                train_generative_losses)

            print('  train: rec = %.6f, dis = %.6f, gen = %.6f' % (
                train_reconstruction_losses_mean,
                train_discriminative_losses_mean,
                train_generative_losses_mean,
            ))

            # compute loss on test data
            test_reconstruction_losses = []
            test_discriminative_losses = []
            test_generative_losses = []
            for test_idx in get_batch_idx(X_test.shape[0], batch_size):
                X_test_batch = X_test[test_idx]
                test_batch_reconstruction_loss =\
                    encoder_decoder_func(X_test_batch)

                # sample from p(z)
                pz_test_batch = np.random.uniform(
                    low=-2, high=2,
                    size=(X_test.shape[0], 2)).astype(
                        np.float32)

                test_batch_discriminative_loss =\
                    discriminator_func(X_test_batch, pz_test_batch)

                test_batch_generative_loss = generator_func(X_test_batch)

                test_reconstruction_losses.append(
                    test_batch_reconstruction_loss)
                test_discriminative_losses.append(
                    test_batch_discriminative_loss)
                test_generative_losses.append(
                    test_batch_generative_loss)

            test_reconstruction_losses_mean = np.mean(
                test_reconstruction_losses)
            test_discriminative_losses_mean = np.mean(
                test_discriminative_losses)
            test_generative_losses_mean = np.mean(
                test_generative_losses)

            print('  test: rec = %.6f, dis = %.6f, gen = %.6f' % (
                test_reconstruction_losses_mean,
                test_discriminative_losses_mean,
                test_generative_losses_mean,
            ))

    except KeyboardInterrupt:
        print('caught ctrl-c, stopped training')
        weights = get_all_param_values([
            layers['l_decoder_out'],
            layers['l_discriminator_out'],
        ])
        print('saving weights to %s' % (weightsfile))
        model.save_weights(weights, weightsfile)
Exemplo n.º 45
0
def main():
    rospy.init_node('execute')
    rs = baxter_interface.RobotEnable(baxter_interface.CHECK_VERSION)
    rs.enable()

    # retrieve images
    global current_image
    def update_image(msg):
        global current_image
        current_image = PIL_Image.frombytes('RGBA', (msg.width, msg.height), msg.data)
        # print msg.width, msg.height, msg.is_bigendian, msg.step, msg.encoding
    rospy.Subscriber('/cameras/left_hand_camera/image', Image, update_image)

    # model
    width = 128
    checkpoint_dir = 'checkpoints-dev-rgb-4-max'
    grasp_class_prediction, logit, grasp_image_ph, keep_prob_ph = build_model(width)
    saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1)

    arm = baxter_interface.Limb('left')
    arm.move_to_neutral()
    gripper = baxter_interface.Gripper('left')
    gripper.calibrate()

    # grasp crop
    crop_center_x = 330
    crop_center_y = 160
    grasp_class_threashold = 0.5
    scale = 1.0
    crop_width = width * scale
    crop_box = (crop_center_x - crop_width/2, crop_center_y - crop_width/2, crop_center_x + crop_width/2, crop_center_y + crop_width/2)

    # grasp workspace
    x0 = 0.81
    y0 = 0.25
    delta = 0.04
    initial_z = 0.1
    bound_z = -0.165

    grasp_class_threashold = 0.5

    pub = rospy.Publisher('/robot/xdisplay', Image, queue_size=1)
    global force
    def display_gripper_state(msg):
        global force
        force = msg.force

    rospy.Subscriber('/robot/end_effector/left_gripper/state', EndEffectorState, display_gripper_state)

    with tf.Session() as sess:
        restore_vars(saver, sess, checkpoint_dir)

        attemp = 0
        while True:
            # sample a grasp
            dx = np.random.rand() * (2. * delta) - delta
            dy = np.random.rand() * (2. * delta) - delta
            target_theta = (np.random.rand() * 2. - 1.) * 3.059
            target_x = x0 + dx
            target_y = y0 + dy

            # move to the grasp location
            execute_linear(arm, target_x, target_y, initial_z, target_theta)

            # predict grasp
            crop = np.array(current_image.crop(crop_box).resize((width, width)))[:,:,:3]
            grasp_pred = grasp_class_prediction.eval(session=sess, feed_dict={
                grasp_image_ph: crop.reshape((1, width, width, 3)),
                keep_prob_ph: 1.,
            })

            # display image 
            draw = PIL_ImageDraw.Draw(current_image)
            draw.text(crop_box[:2], 'prob: %.5f' % grasp_pred[0, 1])
            draw.text((20, 20), 'grasp force: %.5f' % force)
            if grasp_pred[0, 1] > grasp_class_threashold:
                draw.rectangle(crop_box, outline=(0, 255, 0))
            else:
                draw.rectangle(crop_box, outline=(0, 0, 255))
            msg = Image(
                header=Header(
                    stamp=rospy.Time.now(),
                    frame_id='base',
                ),
                width=640,
                height=400,
                step=640 * 4,
                encoding='bgra8',
                is_bigendian=0,
                data=current_image.tobytes(),
            )
            pub.publish(msg)
            if grasp_pred[0, 1] > grasp_class_threashold:
                execute_planar_grasp(arm, gripper, initial_z, bound_z, target_theta, lower_to_drop=0.05)

            attemp += 1
Exemplo n.º 46
0
    N = args.cube_size

    # The number of permutations from a finished cube to get the initial cube
    rand_nb = args.rand_nb
    eps = args.epsilon  # The probability of taking a random moves
    gamma = args.gamma  # The discount

    lr = args.learning_rate

    mb_size = args.mini_batch_size  # The minibatch size

    # Initialize the Replay_Memory:
    replay_memory = []

    # Initialize Q: function of the Neural Network
    Q, gradient_descent_step, params = build_model(args)
    max_action = max_action_Q(N, Q)

    # Printing
    current_episode_century = 0
    count = 0
    for episode in range(M):
        # Initialize a random cube
        env = Environment(N)

        moves = env.suffle(rand_nb=rand_nb)

        # Show good examples in the replay memory with probability
        # "good_examples"
        r = np.random.uniform(0., 1., 1)
        if r < args.good_examples:
Exemplo n.º 47
0
def main(args):
    model_id = build_model_id(args)
    model_path = build_model_path(args, model_id)
    setup_model_dir(args, model_path)
    sys.stdout, sys.stderr = setup_logging(args, model_path)

    x_train, y_train = load_model_data(args.train_file,
            args.data_name, args.target_name)
    x_validation, y_validation = load_model_data(
            args.validation_file,
            args.data_name, args.target_name)

    rng = np.random.RandomState(args.seed)

    if args.n_classes > -1:
        n_classes = args.n_classes
    else:
        n_classes = max(y_train)+1

    n_classes, target_names, class_weight = load_target_data(args, n_classes)

    if class_weight is None and args.class_weight_auto:
        n_samples = len(y_train)
        weights = float(n_samples) / (n_classes * np.bincount(y_train))
        if args.class_weight_exponent:
            weights = weights**args.class_weight_exponent
        class_weight = dict(zip(range(n_classes), weights))

    if args.verbose:
        logging.debug("n_classes {0} min {1} max {2}".format(
            n_classes, min(y_train), max(y_train)))

    y_train_one_hot = np_utils.to_categorical(y_train, n_classes)
    y_validation_one_hot = np_utils.to_categorical(y_validation, n_classes)

    if args.verbose:
        logging.debug("y_train_one_hot " + str(y_train_one_hot.shape))
        logging.debug("x_train " + str(x_train.shape))

    min_vocab_index = np.min(x_train)
    max_vocab_index = np.max(x_train)

    if args.verbose:
        logging.debug("min vocab index {0} max vocab index {1}".format(
            min_vocab_index, max_vocab_index))

    json_cfg = load_model_json(args, x_train, n_classes)

    if args.verbose:
        logging.debug("loading model")

    sys.path.append(args.model_dir)
    import model
    from model import build_model

    #######################################################################      
    # Subsetting
    #######################################################################      
    if args.subsetting_function:
        subsetter = getattr(M, args.subsetting_function)
    else:
        subsetter = None

    def take_subset(subsetter, path, x, y, y_one_hot, n):
        if subsetter is None:
            return x[0:n], y[0:n], y_one_hot[0:n]
        else:
            mask = subsetter(path)
            idx = np.where(mask)[0]
            idx = idx[0:n]
        return x[idx], y[idx], y_one_hot[idx]

    x_train, y_train, y_train_one_hot = take_subset(
            subsetter, args.train_file,
            x_train, y_train, y_train_one_hot,
            n=args.n_train)

    x_validation, y_validation, y_validation_one_hot = take_subset(
            subsetter, args.validation_file,
            x_validation, y_validation, y_validation_one_hot,
            n=args.n_validation)

    #######################################################################      
    # Preprocessing
    #######################################################################      
    if args.preprocessing_class:
        preprocessor = getattr(M, args.preprocessing_class)(seed=args.seed)
    else:
        preprocessor = modeling.preprocess.NullPreprocessor()

    if args.verbose:
        logging.debug("y_train_one_hot " + str(y_train_one_hot.shape))
        logging.debug("x_train " + str(x_train.shape))

    model_cfg = ModelConfig(**json_cfg)
    if args.verbose:
        logging.info("model_cfg " + str(model_cfg))
    net = build_model(model_cfg)
    setattr(net, 'stop_training', False)

    marshaller = None
    if isinstance(net, keras.models.Graph):
        marshaller = getattr(model, args.graph_marshalling_class)()

    logging.info('model has {n_params} parameters'.format(
        n_params=count_parameters(net)))

    if len(args.extra_train_file) > 1:
        callbacks = keras.callbacks.CallbackList()
    else:
        callbacks = []

    save_model_info(args, model_path, model_cfg)


    callback_logger = logging.info if args.log else callable_print

    #######################################################################      
    # Callbacks that need validation set predictions.
    #######################################################################      

    pc = PredictionCallback(x_validation, callback_logger,
            marshaller=marshaller, batch_size=model_cfg.batch_size)
    callbacks.append(pc)

    if args.classification_report:
        cr = ClassificationReport(x_validation, y_validation,
                callback_logger,
                target_names=target_names)
        pc.add(cr)
    
    if args.confusion_matrix:
        cm = ConfusionMatrix(x_validation, y_validation,
                callback_logger)
        pc.add(cm)

    def get_mode(metric_name):
        return {
                'val_loss': 'min',
                'val_acc': 'max',
                'val_f1': 'max',
                'val_f2': 'max',
                'val_f0.5': 'max'
                }[metric_name]

    if args.early_stopping or args.early_stopping_metric is not None:
        es = EarlyStopping(monitor=args.early_stopping_metric,
                mode=get_mode(args.early_stopping_metric),
                patience=model_cfg.patience,
                verbose=1)
        cb = DelegatingMetricCallback(
                x_validation, y_validation, callback_logger,
                delegate=es,
                metric_name=args.early_stopping_metric,
                marshaller=marshaller)
        pc.add(cb)

    if not args.no_save:
        if args.save_all_checkpoints:
            filepath = model_path + '/model-{epoch:04d}.h5'
        else:
            filepath = model_path + '/model.h5'
        mc = ModelCheckpoint(
            filepath=filepath,
            mode=get_mode(args.checkpoint_metric),
            verbose=1,
            monitor=args.checkpoint_metric,
            save_best_only=not args.save_every_epoch)
        cb = DelegatingMetricCallback(
                x_validation, y_validation, callback_logger,
                delegate=mc,
                metric_name=args.checkpoint_metric,
                marshaller=marshaller)
        pc.add(cb)

    if model_cfg.optimizer == 'SGD':
        callbacks.append(SingleStepLearningRateSchedule(patience=10))

    if len(args.extra_train_file) > 1:
        args.extra_train_file.append(args.train_file)
        logging.info("Using the following files for training: " +
                ','.join(args.extra_train_file))

        train_file_iter = itertools.cycle(args.extra_train_file)
        current_train = args.train_file

        callbacks._set_model(net)
        callbacks.on_train_begin(logs={})

        epoch = batch = 0

        while True:
            x_train, y_train_one_hot = preprocessor.fit_transform(
                    x_train, y_train_one_hot)
            x_validation, y_validation_one_hot = preprocessor.transform(
                    x_validation, y_validation_one_hot)

            iteration = batch % len(args.extra_train_file)

            logging.info("epoch {epoch} iteration {iteration} - training with {train_file}".format(
                    epoch=epoch, iteration=iteration, train_file=current_train))
            callbacks.on_epoch_begin(epoch, logs={})

            n_train = x_train.shape[0]

            callbacks.on_batch_begin(batch, logs={'size': n_train})

            index_array = np.arange(n_train)
            if args.shuffle:
                rng.shuffle(index_array)

            batches = keras.models.make_batches(n_train, model_cfg.batch_size)
            logging.info("epoch {epoch} iteration {iteration} - starting {n_batches} batches".format(
                    epoch=epoch, iteration=iteration, n_batches=len(batches)))

            avg_train_loss = avg_train_accuracy = 0.
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]

                if isinstance(net, keras.models.Graph):
                    train_data = marshaller.marshal(
                            x_train[batch_ids], y_train_one_hot[batch_ids])
                    train_loss = net.train_on_batch(
                            train_data, class_weight=class_weight)
                    # It looks like train_on_batch returns a different
                    # type for graph than sequential models.
                    train_loss = train_loss[0]
                    train_accuracy = 0.
                else:
                    train_loss, train_accuracy = net.train_on_batch(
                            x_train[batch_ids], y_train_one_hot[batch_ids],
                            accuracy=True, class_weight=class_weight)

                batch_end_logs = {'loss': train_loss, 'accuracy': train_accuracy}

                avg_train_loss = (avg_train_loss * batch_index + train_loss)/(batch_index + 1)
                avg_train_accuracy = (avg_train_accuracy * batch_index + train_accuracy)/(batch_index + 1)

                callbacks.on_batch_end(batch,
                        logs={'loss': train_loss, 'accuracy': train_accuracy})

            logging.info("epoch {epoch} iteration {iteration} - finished {n_batches} batches".format(
                    epoch=epoch, iteration=iteration, n_batches=len(batches)))

            logging.info("epoch {epoch} iteration {iteration} - loss: {loss} - acc: {acc}".format(
                    epoch=epoch, iteration=iteration, loss=avg_train_loss, acc=avg_train_accuracy))

            batch += 1

            # Validation frequency (this if-block) doesn't necessarily
            # occur in the same iteration as beginning of an epoch
            # (next if-block), so net.evaluate appears twice here.
            kwargs = {
                    'batch_size': model_cfg.batch_size,
                    'verbose': 0 if args.log else 1 
                    }
            pargs = []
            validation_data = {}
            if isinstance(net, keras.models.Graph):
                validation_data = marshaller.marshal(
                        x_validation, y_validation_one_hot)
                pargs = [validation_data]
            else:
                pargs = [x_validation, y_validation_one_hot]
                kwargs['show_accuracy'] = True

            if (iteration + 1) % args.validation_freq == 0:
                if isinstance(net, keras.models.Graph):
                    val_loss = net.evaluate(*pargs, **kwargs)
                    y_hat = net.predict(validation_data, batch_size=model_cfg.batch_size)
                    val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1))
                else:
                    val_loss, val_acc = net.evaluate(
                            *pargs, **kwargs)
                logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format(
                        epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc))
                epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc}
                callbacks.on_epoch_end(epoch, epoch_end_logs)

            if batch % len(args.extra_train_file) == 0:
                if isinstance(net, keras.models.Graph):
                    val_loss = net.evaluate(*pargs, **kwargs)
                    y_hat = net.predict(validation_data, batch_size=model_cfg.batch_size)
                    val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1))
                else:
                    val_loss, val_acc = net.evaluate(
                            *pargs, **kwargs)
                logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format(
                        epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc))
                epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc}
                epoch += 1
                callbacks.on_epoch_end(epoch, epoch_end_logs)

            if net.stop_training:
                logging.info("epoch {epoch} iteration {iteration} - done training".format(
                    epoch=epoch, iteration=iteration))
                break

            current_train = next(train_file_iter)
            x_train, y_train = load_model_data(current_train,
                    args.data_name, args.target_name)
            y_train_one_hot = np_utils.to_categorical(y_train, n_classes)

            if epoch > args.n_epochs:
                break

        callbacks.on_train_end(logs={})
    else:
        x_train, y_train_one_hot = preprocessor.fit_transform(
                x_train, y_train_one_hot)
        x_validation, y_validation_one_hot = preprocessor.transform(
                x_validation, y_validation_one_hot)

        if isinstance(net, keras.models.Graph):
            train_data = marshaller.marshal(
                    x_train, y_train_one_hot)
            validation_data = marshaller.marshal(
                    x_validation, y_validation_one_hot)
            net.fit(train_data,
                shuffle=args.shuffle,
                nb_epoch=args.n_epochs,
                batch_size=model_cfg.batch_size,
                validation_data=validation_data,
                callbacks=callbacks,
                class_weight=class_weight,
                verbose=2 if args.log else 1)
        else:
            net.fit(x_train, y_train_one_hot,
                shuffle=args.shuffle,
                nb_epoch=args.n_epochs,
                batch_size=model_cfg.batch_size,
                show_accuracy=True,
                validation_data=(x_validation, y_validation_one_hot),
                callbacks=callbacks,
                class_weight=class_weight,
                verbose=2 if args.log else 1)
                        help='sum the integers (default: find the max)')
    parser.add_argument('--batch-size', type=int, help='Batch size')
    parser.add_argument('--max-training-files', type=int,
                        help='Maximum number of training files to use.')
    
    args = parser.parse_args()

    model.MAX_TRAINING_FILES = 3000
    cluster_to_data = model.load_training_data()

    logging.info("Finished loading training data. Finalizing training data.")
    
    for model_num in range(5):
        print "Testing with model from cluster " + str(model_num)
        weights_file = "weights-" + str(model_num) + ".09.hdf5"
        my_model = model.build_model(args.gpu)
        my_model.load_weights(weights_file)
        for cluster, data in cluster_to_data.iteritems():
            X, flops, y = data
            new_flops = np.zeros((flops.shape[0], model.INPUT_LENGTH, flops.shape[1]))
            # Zero out flop before it comes out
            for i, X_hand in enumerate(X):
                for j, v in enumerate(X_hand):
                    # First hand post-flop
                    if v[15] == 1:
                        break
                        new_flops[i] = np.concatenate((np.zeros((j, flops.shape[1])),\
                                                       np.tile(np.expand_dims(flops[i], 0),\
                                                               (model.INPUT_LENGTH - j, 1))))

            flops = new_flops.astype(int)
Exemplo n.º 49
0
def trainer(X, C, stmodel,
            dimctx=4800, #vector dimensionality
            dim_word=620, # word vector dimensionality
            dim=1600, # the number of GRU units
            encoder='gru',
            decoder='gru',
            doutput=False,
            max_epochs=5,
            dispFreq=1,
            decay_c=0.,
            grad_clip=5.,
            n_words=40000,
            maxlen_w=100,
            optimizer='adam',
            batch_size = 16,
            saveto='/u/rkiros/research/semhash/models/toy.npz',
            dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
            embeddings=None,
            saveFreq=1000,
            sampleFreq=100,
            reload_=False):

    # Model options
    model_options = {}
    model_options['dimctx'] = dimctx
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder
    model_options['doutput'] = doutput
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['embeddings'] = embeddings
    model_options['saveFreq'] = saveFreq
    model_options['sampleFreq'] = sampleFreq
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Load pre-trained embeddings, if applicable
    if embeddings != None:
        print 'Loading embeddings...'
        with open(embeddings, 'rb') as f:
            embed_map = pkl.load(f)
        dim_word = len(embed_map.values()[0])
        model_options['dim_word'] = dim_word
        preemb = norm_weight(n_words, dim_word)
        pz = defaultdict(lambda : 0)
        for w in embed_map.keys():
            pz[w] = 1
        for w in worddict.keys()[:n_words-2]:
            if pz[w] > 0:
                preemb[worddict[w]] = embed_map[w]
    else:
        preemb = None

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options, preemb=preemb)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    print 'Building sampler'
    f_init, f_next = build_sampler(tparams, model_options, trng)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([X,C], batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, c in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, ctx = homogeneous_data.prepare_data(x, c, worddict, stmodel, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, mask, ctx)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                params = unzip(tparams)
                numpy.savez(saveto, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

            if numpy.mod(uidx, sampleFreq) == 0:
                x_s = x
                mask_s = mask
                ctx_s = ctx
                for jj in xrange(numpy.minimum(10, len(ctx_s))):
                    sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj].reshape(1, model_options['dimctx']), model_options,
                                               trng=trng, k=1, maxlen=100, stochastic=False, use_unk=False)
                    print 'Truth ',jj,': ',
                    for vv in x_s[:,jj]:
                        if vv == 0:
                            break
                        if vv in word_idict:
                            print word_idict[vv],
                        else:
                            print 'UNK',
                    print
                    for kk, ss in enumerate([sample[0]]):
                        print 'Sample (', kk,') ', jj, ': ',
                        for vv in ss:
                            if vv == 0:
                                break
                            if vv in word_idict:
                                print word_idict[vv],
                            else:
                                print 'UNK',
                    print

        print 'Seen %d samples'%n_samples
Exemplo n.º 50
0
def main():
    MAX_VOCAB = 6000
    WINDOW_SIZE = 4
    LEVEL = 'char'
    EMBED_DIM = 100
    MAX_TOKEN_LEN = 15
    NB_LAYERS = 1
    NB_EPOCHS = 3

    cutoff = 10000000
    words = codecs.open('../data/Austen_Sense.txt', 'r', encoding='utf8') \
                  .read().lower().split()[:cutoff]
    print('Loaded', len(words), 'words')

    cnt = Counter(words)
    most_comm = [k for k, v in cnt.most_common(500)]
    print('Most frequent:', most_comm[:50])

    word_to_int = {'UNK': 0}
    for w, c in cnt.most_common(MAX_VOCAB):
        word_to_int[w] = len(word_to_int)
    int_to_word = [None] * len(word_to_int)
    for k, v in word_to_int.items():
        int_to_word[v] = k

    if LEVEL == 'char':
        char_vector_dict, char_idx = index_characters(int_to_word)
        print(char_vector_dict.keys())
        model = build_model(vocab_size=len(word_to_int),
                            embed_dim=EMBED_DIM,
                            level=LEVEL,
                            token_len=MAX_TOKEN_LEN,
                            token_char_vector_dict=char_vector_dict,
                            nb_recurrent_layers=NB_LAYERS)

        most_comm_X = vectorize_tokens(tokens=most_comm,
                                       char_vector_dict=char_vector_dict,
                                       max_len=MAX_TOKEN_LEN)
        print(most_comm_X.shape, '!!!')

    elif LEVEL == 'word':
        model = build_model(vocab_size=len(word_to_int),
                            embed_dim=50,
                            level=LEVEL,
                            token_len=None,
                            token_char_vector_dict=None,
                            nb_recurrent_layers=None)
    model.summary()

    sampling_table = make_sampling_table(size=len(word_to_int))

    for e in range(NB_EPOCHS):
        idx = 0
        losses = []

        for idx in range(WINDOW_SIZE, len(words)-WINDOW_SIZE):
            seq = []
            for w in words[(idx - WINDOW_SIZE): (idx + WINDOW_SIZE)]:
                try:
                    seq.append(word_to_int[w])
                except KeyError:
                    seq.append(0)

            couples, labels = skipgrams(seq, len(word_to_int),
                                        window_size=4,
                                        negative_samples=1.,
                                        shuffle=True,
                                        categorical=False,
                                        sampling_table=sampling_table)

            if len(couples) > 1:
                couples = np.array(couples, dtype='int32')

                c_inp = couples[:, 1]
                c_inp = c_inp[:, np.newaxis]

                if LEVEL == 'word':
                    p_inp = couples[:, 0]
                    p_inp = p_inp[:, np.newaxis]
                elif LEVEL == 'char':
                    tokens = [int_to_word[i] for i in couples[:, 0]]
                    p_inp = vectorize_tokens(tokens=tokens,
                                             char_vector_dict=char_vector_dict,
                                             max_len=MAX_TOKEN_LEN)
                else:
                    raise ValueError('Wrong level param: word or char')

                labels = np.array(labels, dtype='int32')
                
                loss = model.train_on_batch({'pivot': p_inp, 'context': c_inp},
                                            {'label': labels})
                losses.append(loss)

                if idx % 5000 == 0:
                    print(np.mean(losses))

                if idx % 10000 == 0:
                    print(np.mean(losses))

                    print('Compiling repr func')
                    get_activations = K.function([model.layers[0].input,
                                                  K.learning_phase()],
                                                 [model.layers[6].output, ])
                    activations = get_activations([most_comm_X, 0])[0]
                    activations = np.array(activations, dtype='float32')

                    print(activations.shape, '-----')
                    norm_weights = np_utils.normalize(activations)

                    # dimension reduction:
                    tsne = TSNE(n_components=2)
                    coor = tsne.fit_transform(norm_weights)

                    plt.clf()
                    sns.set_style('dark')
                    sns.plt.rcParams['axes.linewidth'] = 0.4
                    fig, ax1 = sns.plt.subplots()

                    labels = most_comm
                    # first plot slices:
                    x1, x2 = coor[:, 0], coor[:, 1]
                    ax1.scatter(x1, x2, 100,
                                edgecolors='none',
                                facecolors='none')
                    # clustering on top (add some colouring):
                    clustering = AgglomerativeClustering(linkage='ward',
                                                         affinity='euclidean',
                                                         n_clusters=10)
                    clustering.fit(coor)
                    # add names:
                    axes = zip(x1, x2, most_comm, clustering.labels_)
                    for x, y, name, cluster_label in axes:
                        ax1.text(x, y, name, ha='center', va="center",
                                 color=plt.cm.spectral(cluster_label / 10.),
                                 fontdict={'family': 'Arial', 'size': 8})
                    # control aesthetics:
                    ax1.set_xlabel('')
                    ax1.set_ylabel('')
                    ax1.set_xticklabels([])
                    ax1.set_xticks([])
                    ax1.set_yticklabels([])
                    ax1.set_yticks([])
                    sns.plt.savefig('embeddings.pdf', bbox_inches=0)
Exemplo n.º 51
0
def trainer(load_from=None,
            save_dir='snapshots',
            name='anon',
            **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print 'reloading...' + load_from
        with open('%s.pkl'%load_from, 'rb') as f:
            curr_model = pkl.load(f)
    else:
        curr_model['options'] = {}

    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v

    model_options = curr_model['options']

    # initialize logger
    import datetime
    timestampedName = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '_' + name

    from logger import Log
    log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training',
              xLabel='Examples Seen', saveFrequency=1)


    print curr_model['options']




    # Load training and development sets
    print 'Loading dataset'
    dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True)
    train = dataset['train']
    dev = dataset['dev']

    # Create dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train['caps']+dev['caps'])
    print 'Dictionary size: ' + str(len(worddict))
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb'))


    print 'Loading data'
    train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print 'Building sentence encoder'
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print 'Building errors..'
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err



    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost)

    print 'Optimization'

    uidx = 0
    curr = 0
    n_samples = 0


    
    for eidx in xrange(model_options['max_epochs']):

        print 'Epoch ', eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud
                log.update({'Error': float(cost)}, n_samples)


            if numpy.mod(uidx, model_options['validFreq']) == 0:

                print 'Computing results...'

                # encode sentences efficiently
                dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)


                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)
                log.update({'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr}, n_samples)
                print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)
                log.update({'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri}, n_samples)

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print 'Saving...',
                    numpy.savez('%s/%s'%(save_dir, name), **unzip(tparams))
                    print 'Done'
                    vis_details['hyperparams'] = model_options
                    # Save visualization details
                    with open('vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    index = json.load(open('vis/roc/index.json', 'r'))
                    models = index[model_options['data']]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open('vis/roc/index.json', 'w') as f:
                        json.dump(index, f)






        print 'Seen %d samples'%n_samples
Exemplo n.º 52
0
    def setup_to_train(self, train_data=None, dev_data=None, test_data=None):
        # create a model directory:
        if os.path.isdir(self.model_dir):
            shutil.rmtree(self.model_dir)
        os.mkdir(self.model_dir)

        self.train_tokens = train_data['token']
        if self.include_test:
            self.test_tokens = test_data['token']
        if self.include_dev:
            self.dev_tokens = dev_data['token']

        idx_cnt = 0
        if self.include_lemma:
            self.lemma_out_idx = idx_cnt
            idx_cnt += 1
            self.train_lemmas = train_data['lemma']
            self.known_lemmas = set(self.train_lemmas)
            if self.include_dev:
                self.dev_lemmas = dev_data['lemma']            
            if self.include_test:
                self.test_lemmas = test_data['lemma']
        if self.include_pos:
            self.pos_out_idx = idx_cnt
            idx_cnt += 1
            self.train_pos = train_data['pos']
            if self.include_dev:
                self.dev_pos = dev_data['pos']
            if self.include_test:
                self.test_pos = test_data['pos']
        if self.include_morph:
            self.morph_out_idx = idx_cnt
            self.train_morph = train_data['morph']
            if self.include_dev:
                self.dev_morph = dev_data['morph']
            if self.include_test:
                self.test_morph = test_data['morph']

        self.preprocessor = Preprocessor().fit(tokens=self.train_tokens,
                                               lemmas=self.train_lemmas,
                                               pos=self.train_pos,
                                               morph=self.train_morph,
                                               include_lemma=self.include_lemma,
                                               include_morph=self.include_morph,
                                               max_token_len=self.max_token_len,
                                               focus_repr=self.focus_repr,
                                               min_lem_cnt=self.min_lem_cnt,
                                               )
        self.pretrainer = Pretrainer(nb_left_tokens=self.nb_left_tokens,
                                     nb_right_tokens=self.nb_right_tokens,
                                     size=self.nb_embedding_dims,
                                     minimum_count=self.min_token_freq_emb)
        self.pretrainer.fit(tokens=self.train_tokens)

        train_transformed = self.preprocessor.transform(tokens=self.train_tokens,
                                               lemmas=self.train_lemmas,
                                               pos=self.train_pos,
                                               morph=self.train_morph)
        if self.include_dev:
            dev_transformed = self.preprocessor.transform(tokens=self.dev_tokens,
                                        lemmas=self.dev_lemmas,
                                        pos=self.dev_pos,
                                        morph=self.dev_morph)
        if self.include_test:
            test_transformed = self.preprocessor.transform(tokens=self.test_tokens,
                                        lemmas=self.test_lemmas,
                                        pos=self.test_pos,
                                        morph=self.test_morph)

        self.train_X_focus = train_transformed['X_focus']
        if self.include_dev:
            self.dev_X_focus = dev_transformed['X_focus']
        if self.include_test:
            self.test_X_focus = test_transformed['X_focus']

        if self.include_lemma:
            self.train_X_lemma = train_transformed['X_lemma']
            if self.include_dev:
                self.dev_X_lemma = dev_transformed['X_lemma']
            if self.include_test:
                self.test_X_lemma = test_transformed['X_lemma']

        if self.include_pos:
            self.train_X_pos = train_transformed['X_pos']
            if self.include_dev:
                self.dev_X_pos = dev_transformed['X_pos']
            if self.include_test:
                self.test_X_pos = test_transformed['X_pos']

        if self.include_morph:
            self.train_X_morph = train_transformed['X_morph']
            if self.include_dev:
                self.dev_X_morph = dev_transformed['X_morph']
            if self.include_test:
                self.test_X_morph = test_transformed['X_morph']

        self.train_contexts = self.pretrainer.transform(tokens=self.train_tokens)
        if self.include_dev:
            self.dev_contexts = self.pretrainer.transform(tokens=self.dev_tokens)
        if self.include_test:
            self.test_contexts = self.pretrainer.transform(tokens=self.test_tokens)
        
        print('Building model...')
        nb_tags = None
        try:
            nb_tags = len(self.preprocessor.pos_encoder.classes_)
        except AttributeError:
            pass
        nb_morph_cats = None
        try:
            nb_morph_cats = self.preprocessor.nb_morph_cats
        except AttributeError:
            pass
        max_token_len, token_char_dict = None, None
        try:
            max_token_len = self.preprocessor.max_token_len
            token_char_dict = self.preprocessor.token_char_dict
        except AttributeError:
            pass
        max_lemma_len, lemma_char_dict = None, None
        try:
            max_lemma_len = self.preprocessor.max_lemma_len
            lemma_char_dict = self.preprocessor.lemma_char_dict
        except AttributeError:
            pass
        nb_lemmas = None
        try:
            nb_lemmas = len(self.preprocessor.lemma_encoder.classes_)
        except AttributeError:
            pass
        self.model = build_model(token_len=max_token_len,
                             token_char_vector_dict=token_char_dict,
                             lemma_len=max_lemma_len,
                             nb_tags=nb_tags,
                             nb_morph_cats=nb_morph_cats,
                             lemma_char_vector_dict=lemma_char_dict,
                             nb_encoding_layers=self.nb_encoding_layers,
                             nb_dense_dims=self.nb_dense_dims,
                             nb_embedding_dims=self.nb_embedding_dims,
                             nb_train_tokens=len(self.pretrainer.train_token_vocab),
                             nb_context_tokens=self.nb_context_tokens,
                             pretrained_embeddings=self.pretrainer.pretrained_embeddings,
                             include_token=self.include_token,
                             include_context=self.include_context,
                             include_lemma=self.include_lemma,
                             include_pos=self.include_pos,
                             include_morph=self.include_morph,
                             nb_filters = self.nb_filters,
                             filter_length = self.filter_length,
                             focus_repr = self.focus_repr,
                             dropout_level = self.dropout_level,
                             nb_lemmas = nb_lemmas,
                            )
        self.save()
        self.setup = True
Exemplo n.º 53
0
def main(args):
    model_id = build_model_id(args)
    model_path = build_model_path(args, model_id)
    setup_model_dir(args, model_path)
    sys.stdout, sys.stderr = setup_logging(args, model_path)

    x_train, y_train = load_model_data(args.train_file,
            args.data_name, args.target_name)
    x_validation, y_validation = load_model_data(
            args.validation_file,
            args.data_name, args.target_name)

    rng = np.random.RandomState(args.seed)

    if args.n_classes > -1:
        n_classes = args.n_classes
    else:
        n_classes = max(y_train)+1

    n_classes, target_names, class_weight = load_target_data(args, n_classes)

    if len(class_weight) == 0:
        n_samples = len(y_train)
        print('n_samples', n_samples)
        print('classes', range(n_classes))
        print('weights', n_samples / (n_classes * np.bincount(y_train)))
        class_weight = dict(zip(range(n_classes),
            n_samples / (n_classes * np.bincount(y_train))))
    print('class_weight', class_weight)

    logging.debug("n_classes {0} min {1} max {2}".format(
        n_classes, min(y_train), max(y_train)))

    y_train_one_hot = np_utils.to_categorical(y_train, n_classes)
    y_validation_one_hot = np_utils.to_categorical(y_validation, n_classes)

    logging.debug("y_train_one_hot " + str(y_train_one_hot.shape))
    logging.debug("x_train " + str(x_train.shape))

    min_vocab_index = np.min(x_train)
    max_vocab_index = np.max(x_train)
    logging.debug("min vocab index {0} max vocab index {1}".format(
        min_vocab_index, max_vocab_index))

    json_cfg = load_model_json(args, x_train, n_classes)

    logging.debug("loading model")

    sys.path.append(args.model_dir)
    import model
    from model import build_model

    #######################################################################      
    # Subsetting
    #######################################################################      
    if args.subsetting_function:
        subsetter = getattr(model, args.subsetting_function)
    else:
        subsetter = None

    def take_subset(subsetter, path, x, y, y_one_hot, n):
        if subsetter is None:
            return x[0:n], y[0:n], y_one_hot[0:n]
        else:
            mask = subsetter(path)
            idx = np.where(mask)[0]
            idx = idx[0:n]
        return x[idx], y[idx], y_one_hot[idx]

    x_train, y_train, y_train_one_hot = take_subset(
            subsetter, args.train_file,
            x_train, y_train, y_train_one_hot,
            n=args.n_train)

    x_validation, y_validation, y_validation_one_hot = take_subset(
            subsetter, args.validation_file,
            x_validation, y_validation, y_validation_one_hot,
            n=args.n_validation)

    #######################################################################      
    # Preprocessing
    #######################################################################      
    if args.preprocessing_class:
        preprocessor = getattr(model, args.preprocessing_class)(seed=args.seed)
    else:
        preprocessor = modeling.preprocess.NullPreprocessor()

    logging.debug("y_train_one_hot " + str(y_train_one_hot.shape))
    logging.debug("x_train " + str(x_train.shape))

    model_cfg = ModelConfig(**json_cfg)
    logging.info("model_cfg " + str(model_cfg))
    model = build_model(model_cfg)
    setattr(model, 'stop_training', False)

    logging.info('model has {n_params} parameters'.format(
        n_params=count_parameters(model)))

    if len(args.extra_train_file) > 1:
        callbacks = keras.callbacks.CallbackList()
    else:
        callbacks = []

    save_model_info(args, model_path, model_cfg)

    if not args.no_save:
        if args.save_all_checkpoints:
            filepath = model_path + '/model-{epoch:04d}.h5'
        else:
            filepath = model_path + '/model.h5'
        callbacks.append(ModelCheckpoint(
            filepath=filepath,
            verbose=1,
            save_best_only=not args.save_every_epoch))

    callback_logger = logging.info if args.log else callable_print

    if args.n_epochs < sys.maxsize:
        # Number of epochs overrides patience.  If the number of epochs
        # is specified on the command line, the model is trained for
        # exactly that number; otherwise, the model is trained with
        # early stopping using the patience specified in the model 
        # configuration.
        callbacks.append(EarlyStopping(
            monitor='val_loss', patience=model_cfg.patience, verbose=1))

    if args.classification_report:
        cr = ClassificationReport(x_validation, y_validation,
                callback_logger,
                target_names=target_names)
        callbacks.append(cr)

    if model_cfg.optimizer == 'SGD':
        callbacks.append(SingleStepLearningRateSchedule(patience=10))

    if len(args.extra_train_file) > 1:
        args.extra_train_file.append(args.train_file)
        logging.info("Using the following files for training: " +
                ','.join(args.extra_train_file))

        train_file_iter = itertools.cycle(args.extra_train_file)
        current_train = args.train_file

        callbacks._set_model(model)
        callbacks.on_train_begin(logs={})

        epoch = batch = 0

        while True:
            x_train, y_train_one_hot = preprocessor.fit_transform(
                    x_train, y_train_one_hot)
            x_validation, y_validation_one_hot = preprocessor.transform(
                    x_validation, y_validation_one_hot)

            iteration = batch % len(args.extra_train_file)

            logging.info("epoch {epoch} iteration {iteration} - training with {train_file}".format(
                    epoch=epoch, iteration=iteration, train_file=current_train))
            callbacks.on_epoch_begin(epoch, logs={})

            n_train = x_train.shape[0]

            callbacks.on_batch_begin(batch, logs={'size': n_train})

            index_array = np.arange(n_train)
            if args.shuffle:
                rng.shuffle(index_array)

            batches = keras.models.make_batches(n_train, model_cfg.batch_size)
            logging.info("epoch {epoch} iteration {iteration} - starting {n_batches} batches".format(
                    epoch=epoch, iteration=iteration, n_batches=len(batches)))

            avg_train_loss = avg_train_accuracy = 0.
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]

                if isinstance(model, keras.models.Graph):
                    data = {
                            'input': x_train[batch_ids],
                            'output': y_train_one_hot[batch_ids]
                            }
                    train_loss = model.train_on_batch(data, class_weight=class_weight)
                    train_accuracy = 0.
                else:
                    train_loss, train_accuracy = model.train_on_batch(
                            x_train[batch_ids], y_train_one_hot[batch_ids],
                            accuracy=True, class_weight=class_weight)

                batch_end_logs = {'loss': train_loss, 'accuracy': train_accuracy}

                avg_train_loss = (avg_train_loss * batch_index + train_loss)/(batch_index + 1)
                avg_train_accuracy = (avg_train_accuracy * batch_index + train_accuracy)/(batch_index + 1)

                callbacks.on_batch_end(batch,
                        logs={'loss': train_loss, 'accuracy': train_accuracy})

            logging.info("epoch {epoch} iteration {iteration} - finished {n_batches} batches".format(
                    epoch=epoch, iteration=iteration, n_batches=len(batches)))

            logging.info("epoch {epoch} iteration {iteration} - loss: {loss} - acc: {acc}".format(
                    epoch=epoch, iteration=iteration, loss=avg_train_loss, acc=avg_train_accuracy))

            batch += 1

            # Validation frequency (this if-block) doesn't necessarily
            # occur in the same iteration as beginning of an epoch
            # (next if-block), so model.evaluate appears twice here.
            kwargs = { 'verbose': 0 if args.log else 1 }
            pargs = []
            validation_data = {}
            if isinstance(model, keras.models.Graph):
                validation_data = {
                        'input': x_validation,
                        'output': y_validation_one_hot
                        }
                pargs = [validation_data]
            else:
                pargs = [x_validation, y_validation_one_hot]
                kwargs['show_accuracy'] = True

            if (iteration + 1) % args.validation_freq == 0:
                if isinstance(model, keras.models.Graph):
                    val_loss = model.evaluate(*pargs, **kwargs)
                    y_hat = model.predict(validation_data)
                    val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1))
                else:
                    val_loss, val_acc = model.evaluate(
                            *pargs, **kwargs)
                logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format(
                        epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc))
                epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc}
                callbacks.on_epoch_end(epoch, epoch_end_logs)

            if batch % len(args.extra_train_file) == 0:
                if isinstance(model, keras.models.Graph):
                    val_loss = model.evaluate(*pargs, **kwargs)
                    y_hat = model.predict(validation_data)
                    val_acc = accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1))
                else:
                    val_loss, val_acc = model.evaluate(
                            *pargs, **kwargs)
                logging.info("epoch {epoch} iteration {iteration} - val_loss: {val_loss} - val_acc: {val_acc}".format(
                        epoch=epoch, iteration=iteration, val_loss=val_loss, val_acc=val_acc))
                epoch_end_logs = {'iteration': iteration, 'val_loss': val_loss, 'val_acc': val_acc}
                epoch += 1
                callbacks.on_epoch_end(epoch, epoch_end_logs)

            if model.stop_training:
                logging.info("epoch {epoch} iteration {iteration} - done training".format(
                    epoch=epoch, iteration=iteration))
                break

            current_train = next(train_file_iter)
            x_train, y_train = load_model_data(current_train,
                    args.data_name, args.target_name)
            y_train_one_hot = np_utils.to_categorical(y_train, n_classes)

            if epoch > args.n_epochs:
                break

        callbacks.on_train_end(logs={})
    else:
        x_train, y_train_one_hot = preprocessor.fit_transform(
                x_train, y_train_one_hot)
        x_validation, y_validation_one_hot = preprocessor.transform(
                x_validation, y_validation_one_hot)
        if isinstance(model, keras.models.Graph):
            data = {
                    'input': x_train,
                    'output': y_train_one_hot
                    }
            validation_data = {
                    'input': x_validation,
                    'output': y_validation_one_hot
                    }
            model.fit(data,
                shuffle=args.shuffle,
                nb_epoch=args.n_epochs,
                batch_size=model_cfg.batch_size,
                validation_data=validation_data,
                callbacks=callbacks,
                class_weight=class_weight,
                verbose=2 if args.log else 1)
            y_hat = model.predict(validation_data)
            print('val_acc %.04f' % 
                    accuracy_score(y_validation, np.argmax(y_hat['output'], axis=1)))
        else:
            model.fit(x_train, y_train_one_hot,
                shuffle=args.shuffle,
                nb_epoch=args.n_epochs,
                batch_size=model_cfg.batch_size,
                show_accuracy=True,
                validation_data=(x_validation, y_validation_one_hot),
                callbacks=callbacks,
                class_weight=class_weight,
                verbose=2 if args.log else 1)
Exemplo n.º 54
0
def trainer(X, 
            dim_word=620, # word vector dimensionality
            dim=2400, # the number of GRU units
            encoder='gru',
            decoder='gru',
            max_epochs=5,
            dispFreq=1,
            decay_c=0.,
            grad_clip=5.,
            n_words=20000,
            maxlen_w=30,
            optimizer='adam',
            batch_size = 64,
            saveto='/u/rkiros/research/semhash/models/toy.npz',
            dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
            saveFreq=1000,
            reload_=False):

    # Model options
    model_options = {}
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder 
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['saveFreq'] = saveFreq
    model_options['reload_'] = reload_

    print model_options

    # reload options
    # TODO: if loading old parameters you need to make sure you are using them
    #  in the rest of the code
    # if reload_ and os.path.exists(saveto):
    #     print 'reloading...' + saveto
    #     with open('%s.pkl'%saveto, 'rb') as f:
    #         model_options = pkl.load(f)

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto + '.npz', params)

    tparams = init_tparams(params)

    trng, x, x_mask, y, y_mask, z, z_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask, z, z_mask]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    trainX = homogeneous_data.grouper(X)
    train_iter = homogeneous_data.HomogeneousData(trainX, batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, y, z in train_iter:
            n_samples += len(x)
            uidx += 1

            x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data(x, y, z, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                params = unzip(tparams)
                numpy.savez(saveto, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

        print 'Seen %d samples'%n_samples
Exemplo n.º 55
0
import sys
import yaml
from data import build_datasets
from model import build_model, build_algorithm
from monitor import build_extensions
from blocks.main_loop import MainLoop
from blocks.model import Model
from blocks.extensions.saveload import Load
import cPickle as pickle
from blocks.graph import ComputationGraph

config_dict = yaml.load(open(sys.argv[1], 'r'))
print config_dict

train, valid, alphabet = build_datasets(config_dict)
generator, cost = build_model(len(alphabet), config_dict)
algorithm = build_algorithm(generator, cost, config_dict)
extensions = build_extensions(cost, algorithm, valid, config_dict)
main_loop = MainLoop(algorithm=algorithm, data_stream=train,
                     model=Model(cost), extensions=extensions)
ml = Load(config_dict['checkpoint_path'], load_log=True)
print dir(ml)

ml.load_to(main_loop)
generator = main_loop.model.get_top_bricks()[-1]

sampler = ComputationGraph(generator.generate(
    n_steps=1000, batch_size=10, iterate=True)).get_theano_function()

samples = sampler()
outputs = samples[-2]
Exemplo n.º 56
0
def trainer(load_from=None, save_dir="snapshots", name="anon", **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print "reloading..." + load_from
        with open("%s.pkl" % load_from, "rb") as f:
            curr_model = pkl.load(f)
    else:
        curr_model["options"] = {}

    for k, v in kwargs.iteritems():
        curr_model["options"][k] = v

    model_options = curr_model["options"]

    # initialize logger
    import datetime

    timestampedName = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + "_" + name

    from logger import Log

    log = Log(
        name=timestampedName, hyperparams=model_options, saveDir="vis/training", xLabel="Examples Seen", saveFrequency=1
    )

    print curr_model["options"]

    # Load training and development sets
    print "Loading dataset"
    dataset = load_dataset(model_options["data"], cnn=model_options["cnn"], load_train=True)
    train = dataset["train"]
    dev = dataset["dev"]

    # Create dictionary
    print "Creating dictionary"
    worddict = build_dictionary(train["caps"] + dev["caps"])
    print "Dictionary size: " + str(len(worddict))
    curr_model["worddict"] = worddict
    curr_model["options"]["n_words"] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open("%s/%s.pkl" % (save_dir, name), "wb"))

    print "Loading data"
    train_iter = datasource.Datasource(train, batch_size=model_options["batch_size"], worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print "Building model"
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print "Building sentence encoder"
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print "Building image encoder"
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print "Building f_grad...",
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print "Building errors.."
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model["f_senc"] = f_senc
    curr_model["f_ienc"] = f_ienc
    curr_model["f_err"] = f_err

    if model_options["grad_clip"] > 0.0:
        grads = [maxnorm(g, model_options["grad_clip"]) for g in grads]

    lr = tensor.scalar(name="lr")
    print "Building optimizers...",
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options["optimizer"])(lr, tparams, grads, inps, cost)

    print "Optimization"

    uidx = 0
    curr = 0
    n_samples = 0

    for eidx in xrange(model_options["max_epochs"]):

        print "Epoch ", eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options["lrate"])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print "NaN detected"
                return 1.0, 1.0, 1.0

            if numpy.mod(uidx, model_options["dispFreq"]) == 0:
                print "Epoch ", eidx, "Update ", uidx, "Cost ", cost, "UD ", ud
                log.update({"Error": float(cost)}, n_samples)

            if numpy.mod(uidx, model_options["validFreq"]) == 0:

                print "Computing results..."

                # encode sentences efficiently
                dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options["batch_size"])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)
                log.update({"R@1": r1, "R@5": r5, "R@10": r10, "median_rank": medr, "mean_rank": meanr}, n_samples)
                print "Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)
                log.update(
                    {
                        "Image2Caption_R@1": r1i,
                        "Image2Caption_R@5": r5i,
                        "Image2CaptionR@10": r10i,
                        "Image2Caption_median_rank": medri,
                        "Image2Caption_mean_rank": meanri,
                    },
                    n_samples,
                )

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print "Saving...",
                    numpy.savez("%s/%s" % (save_dir, name), **unzip(tparams))
                    print "Done"
                    vis_details["hyperparams"] = model_options
                    # Save visualization details
                    with open("vis/roc/%s/%s.json" % (model_options["data"], timestampedName), "w") as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    try:
                        index = json.load(open("vis/roc/index.json", "r"))
                    except IOError:
                        index = {model_options["data"]: []}

                    models = index[model_options["data"]]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open("vis/roc/index.json", "w") as f:
                        json.dump(index, f)

        print "Seen %d samples" % n_samples
def train(dim_word_desc=400,# word vector dimensionality
          dim_word_q=400,
          dim_word_ans=600,
          dim_proj=300,
          dim=400,# the number of LSTM units
          encoder_desc='lstm',
          encoder_desc_word='lstm',
          encoder_desc_sent='lstm',
          use_dq_sims=False,
          eyem=None,
          learn_h0=False,
          use_desc_skip_c_g=False,
          debug=False,
          encoder_q='lstm',
          patience=10,
          max_epochs=5000,
          dispFreq=100,
          decay_c=0.,
          alpha_c=0.,
          clip_c=-1.,
          lrate=0.01,
          n_words_q=49145,
          n_words_desc=115425,
          n_words_ans=409,
          pkl_train_files=None,
          pkl_valid_files=None,
          maxlen=2000, # maximum length of the description
          optimizer='rmsprop',
          batch_size=2,
          vocab=None,
          valid_batch_size=16,
          use_elu_g=False,
          saveto='model.npz',
          model_dir=None,
          ms_nlayers=3,
          validFreq=1000,
          saveFreq=1000, # save the parameters after every saveFreq updates
          datasets=[None],
          truncate=400,
          momentum=0.9,
          use_bidir=False,
          cost_mask=None,
          valid_datasets=['/u/yyu/stor/caglar/rc-data/cnn/cnn_test_data.h5',
                          '/u/yyu/stor/caglar/rc-data/cnn/cnn_valid_data.h5'],
          dropout_rate=0.5,
          use_dropout=True,
          reload_=True,
          **opt_ds):

    ensure_dir_exists(model_dir)
    mpath = os.path.join(model_dir, saveto)
    mpath_best = os.path.join(model_dir, prfx("best", saveto))
    mpath_last = os.path.join(model_dir, prfx("last", saveto))
    mpath_stats = os.path.join(model_dir, prfx("stats", saveto))

    # Model options
    model_options = locals().copy()
    model_options['use_sent_reps'] = opt_ds['use_sent_reps']
    stats = defaultdict(list)

    del model_options['eyem']
    del model_options['cost_mask']

    if cost_mask is not None:
        cost_mask = sharedX(cost_mask)

    # reload options and parameters
    if reload_:
        print "Reloading the model."
        if os.path.exists(mpath_best):
            print "Reloading the best model from %s." % mpath_best
            with open(os.path.join(mpath_best, '%s.pkl' % mpath_best), 'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath_best, params)
        elif os.path.exists(mpath):
            print "Reloading the model from %s." % mpath
            with open(os.path.join(mpath, '%s.pkl' % mpath), 'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath, params)
        else:
            raise IOError("Couldn't open the file.")
    else:
        print "Couldn't reload the models initializing from scratch."
        params = init_params(model_options)

    if datasets[0]:
        print "Short dataset", datasets[0]

    print 'Loading data'
    print 'Building model'
    if pkl_train_files is None or pkl_valid_files is None:
        train, valid, test = load_data(path=datasets[0],
                                       valid_path=valid_datasets[0],
                                       test_path=valid_datasets[1],
                                       batch_size=batch_size,
                                       **opt_ds)
    else:
        train, valid, test = load_pkl_data(train_file_paths=pkl_train_files,
                                           valid_file_paths=pkl_valid_files,
                                           batch_size=batch_size,
                                           vocab=vocab,
                                           eyem=eyem,
                                           **opt_ds)

    tparams = init_tparams(params)
    trng, use_noise, inps_d, \
                     opt_ret, \
                     cost, errors, ent_errors, ent_derrors, probs = \
                        build_model(tparams,
                                    model_options,
                                    prepare_data if not opt_ds['use_sent_reps'] \
                                            else prepare_data_sents,
                                    valid,
                                    cost_mask=cost_mask)

    alphas = opt_ret['dec_alphas']

    if opt_ds['use_sent_reps']:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'],
                inps_d['slen'], inps_d['qlen'],\
                inps_d['ent_mask']
                ]
    else:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'], \
                inps_d['qlen'], \
                inps_d['ent_mask']]

    outs = [cost, errors, probs, alphas]
    if ent_errors:
        outs += [ent_errors]

    if ent_derrors:
        outs += [ent_derrors]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, outs, profile=profile)
    print 'Done'

    # Apply weight decay on the feed-forward connections
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.

        for kk, vv in tparams.iteritems():
            if "logit" in kk or "ff" in kk:
                weight_decay += (vv ** 2).sum()

        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Computing gradient...',
    grads = safe_grad(cost, itemlist(tparams))
    print 'Done'

    # Gradient clipping:
    if clip_c > 0.:
        g2 = get_norms(grads)
        for p, g in grads.iteritems():
            grads[p] = tensor.switch(g2 > (clip_c**2),
                                     (g / tensor.sqrt(g2 + 1e-8)) * clip_c,
                                     g)
    inps.pop()
    if optimizer.lower() == "adasecant":
        learning_rule = Adasecant(delta_clip=25.0,
                                  use_adagrad=True,
                                  grad_clip=0.25,
                                  gamma_clip=0.)
    elif optimizer.lower() == "rmsprop":
        learning_rule = RMSPropMomentum(init_momentum=momentum)
    elif optimizer.lower() == "adam":
        learning_rule = Adam()
    elif optimizer.lower() == "adadelta":
        learning_rule = AdaDelta()

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    learning_rule = None

    if learning_rule:
        f_grad_shared, f_update = learning_rule.get_funcs(learning_rate=lr,
                                                          grads=grads,
                                                          inp=inps,
                                                          cost=cost,
                                                          errors=errors)
    else:
        f_grad_shared, f_update = eval(optimizer)(lr,
                                                  tparams,
                                                  grads,
                                                  inps,
                                                  cost,
                                                  errors)

    print 'Done'
    print 'Optimization'
    history_errs = []
    # reload history
    if reload_ and os.path.exists(mpath):
        history_errs = list(numpy.load(mpath)['history_errs'])

    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size

    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size

    best_found = False
    uidx = 0
    estop = False

    train_cost_ave, train_err_ave, \
            train_gnorm_ave = reset_train_vals()

    for eidx in xrange(max_epochs):
        n_samples = 0

        if train.done:
            train.reset()

        for d_, q_, a, em in train:
            n_samples += len(a)
            uidx += 1
            use_noise.set_value(1.)

            if opt_ds['use_sent_reps']:
                # To mask the description and the question.
                d, d_mask, q, q_mask, dlen, slen, qlen = prepare_data_sents(d_,
                                                                            q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(d,
                                                           d_mask,
                                                           q,
                                                           q_mask,
                                                           a,
                                                           dlen,
                                                           slen,
                                                           qlen)
            else:
                d, d_mask, q, q_mask, dlen, qlen = prepare_data(d_, q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(d, d_mask,
                                                           q, q_mask,
                                                           a,
                                                           dlen,
                                                           qlen)

            upnorm = f_update(lrate)
            ud = time.time() - ud_start

            # Collect the running ave train stats.
            train_cost_ave = running_ave(train_cost_ave,
                                         cost)
            train_err_ave = running_ave(train_err_ave,
                                        errors)
            train_gnorm_ave = running_ave(train_gnorm_ave,
                                          gnorm)

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                import ipdb; ipdb.set_trace()

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, ' Update ', uidx, \
                        ' Cost ', cost, ' UD ', ud, \
                        ' UpNorm ', upnorm[0].tolist(), \
                        ' GNorm ', gnorm, \
                        ' Pnorm ', pnorm, 'Terrors ', errors

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',
                if best_p is not None and best_found:
                    numpy.savez(mpath_best, history_errs=history_errs, **best_p)
                    pkl.dump(model_options, open('%s.pkl' % mpath_best, 'wb'))
                else:
                    params = unzip(tparams)

                numpy.savez(mpath, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl' % mpath, 'wb'))
                pkl.dump(stats, open("%s.pkl" % mpath_stats, 'wb'))

                print 'Done'
                print_param_norms(tparams)

            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                if valid.done:
                    valid.reset()

                valid_costs, valid_errs, valid_probs, \
                        valid_alphas, error_ent, error_dent = eval_model(f_log_probs,
                                                  prepare_data if not opt_ds['use_sent_reps'] \
                                                    else prepare_data_sents,
                                                  model_options,
                                                  valid,
                                                  use_sent_rep=opt_ds['use_sent_reps'])

                valid_alphas_ = numpy.concatenate([va.argmax(0) for va  in valid_alphas.tolist()], axis=0)
                valid_err = valid_errs.mean()
                valid_cost = valid_costs.mean()
                valid_alpha_ent = -negentropy(valid_alphas)

                mean_valid_alphas = valid_alphas_.mean()
                std_valid_alphas = valid_alphas_.std()

                mean_valid_probs = valid_probs.argmax(1).mean()
                std_valid_probs = valid_probs.argmax(1).std()

                history_errs.append([valid_cost, valid_err])

                stats['train_err_ave'].append(train_err_ave)
                stats['train_cost_ave'].append(train_cost_ave)
                stats['train_gnorm_ave'].append(train_gnorm_ave)

                stats['valid_errs'].append(valid_err)
                stats['valid_costs'].append(valid_cost)
                stats['valid_err_ent'].append(error_ent)
                stats['valid_err_desc_ent'].append(error_dent)

                stats['valid_alphas_mean'].append(mean_valid_alphas)
                stats['valid_alphas_std'].append(std_valid_alphas)
                stats['valid_alphas_ent'].append(valid_alpha_ent)

                stats['valid_probs_mean'].append(mean_valid_probs)
                stats['valid_probs_std'].append(std_valid_probs)

                if uidx == 0 or valid_err <= numpy.array(history_errs)[:, 1].min():
                    best_p = unzip(tparams)
                    bad_counter = 0
                    best_found = True
                else:
                    bst_found = False

                if numpy.isnan(valid_err):
                    import ipdb; ipdb.set_trace()


                print "============================"
                print '\t>>>Valid error: ', valid_err, \
                        ' Valid cost: ', valid_cost
                print '\t>>>Valid pred mean: ', mean_valid_probs, \
                        ' Valid pred std: ', std_valid_probs
                print '\t>>>Valid alphas mean: ', mean_valid_alphas, \
                        ' Valid alphas std: ', std_valid_alphas, \
                        ' Valid alpha negent: ', valid_alpha_ent, \
                        ' Valid error ent: ', error_ent, \
                        ' Valid error desc ent: ', error_dent

                print "============================"
                print "Running average train stats "
                print '\t>>>Train error: ', train_err_ave, \
                        ' Train cost: ', train_cost_ave, \
                        ' Train grad norm: ', train_gnorm_ave
                print "============================"


                train_cost_ave, train_err_ave, \
                    train_gnorm_ave = reset_train_vals()


        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid.reset()
    valid_cost, valid_error, valid_probs, \
            valid_alphas, error_ent = eval_model(f_log_probs,
                                      prepare_data if not opt_ds['use_sent_reps'] \
                                           else prepare_data_sents,
                                      model_options, valid,
                                      use_sent_rep=opt_ds['use_sent_rep'])

    print " Final eval resuts: "
    print 'Valid error: ', valid_error.mean()
    print 'Valid cost: ', valid_cost.mean()
    print '\t>>>Valid pred mean: ', valid_probs.mean(), \
            ' Valid pred std: ', valid_probs.std(), \
            ' Valid error ent: ', error_ent

    params = copy.copy(best_p)

    numpy.savez(mpath_last,
                zipped_params=best_p,
                history_errs=history_errs,
                **params)

    return valid_err, valid_cost
Exemplo n.º 58
0
    def __init__(self,
                 alpha,
             batch_size,
             n_epochs,
             wordVecLen,
             flag_dropout,
             datapath,
             random_seed,
             dropoutRates,
             optimizer,
             dispFreq,
             beam_size,
             flag_random_lookup_table,
             flag_toy_data,
             size_hidden_layer,
             dataset,
             result_path,
             sentence_modeling,
             CNN_filter_length,
             LSTM_go_backwards
             ):
        model_options = locals().copy()
        model_options['rng'] = np.random.RandomState(random_seed)
        print 'Loading data'
        src_train,src_valid,src_test,dic_w2idx, dic_idx2w, dic_w2embed, dic_idx2embed, embedding = load_data(path=datapath)
        if flag_toy_data == True:
            src_valid = src_valid[:10]
            src_test = src_test[:10] 
            #src_train = copy.copy(src_valid)
            src_train = src_train[:10]
        elif flag_toy_data != False:
            valid_l = len(src_valid) * flag_toy_data
            test_l = len(src_test) * flag_toy_data
            train_l = len(src_train) * flag_toy_data
            src_valid = src_valid[:int(valid_l)]
            src_test = src_test[:int(test_l)] 
            src_train = src_train[:int(train_l)]
            
        train,pairdict_train = prepare_data(src_train)
        valid,pairdict_valid = prepare_data(src_valid)
        test,pairdict_test = prepare_data(src_test)
        model_options['embedding'] = embedding
        
        (sentence1,sentence1_mask,sentence2,sentence2_mask,y,cost,f_pred,tparams,f_debug) = build_model(model_options)
        #f_cost = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask,y], cost, name='f_cost')
    
        #grads = tensor.grad(theano.gradient.grad_clip(cost, -2.0, 2.0), wrt=tparams.values())
        grads = tensor.grad(theano.gradient.grad_clip(cost, -2.0, 2.0), wrt=tparams)
        # grads = tensor.grad(cost, wrt=tparams.values())
        #f_grad = theano.function([sentence1,sentence1_mask,sentence2,sentence2_mask,y], grads, name='f_grad')
    
        lr = tensor.scalar(name='lr')
        if model_options['optimizer'] == 'sgd': optimizer = sgd
        elif model_options['optimizer'] == 'rmsprop': optimizer = rmsprop
        else: optimizer = adadelta
        f_grad_shared, f_update = optimizer(lr, tparams, grads, sentence1,sentence1_mask,sentence2,sentence2_mask,y, cost)
        
        
        print 'Optimization'

        kf_valid = get_minibatches_idx(len(valid), model_options['batch_size'])
        kf_test = get_minibatches_idx(len(test), model_options['batch_size'])
    
        print "%d train examples" % len(train)
        print "%d valid examples" % len(valid)
        print "%d test examples" % len(test)
        sys.stdout.flush()
        
        
        best_validation_score = -np.inf
        best_iter = 0
        uidx = 0  # the number of update done
        for epoch in xrange(model_options['n_epochs']):
            print ('Training on %d epoch' % epoch)
            sys.stdout.flush()
            kf = get_minibatches_idx(len(train), batch_size, shuffle=True)
            start_time = time.time()
            samples_seen = 0
            for _, train_index in kf:
                uidx += 1
                batch_samples = [train[t] for t in train_index]
                samples_seen += len(batch_samples)
                #print batch_samples
                sentence1,sentence1_mask,sentence2,sentence2_mask,y = data_padding(batch_samples)
                #print sentence1,sentence1_mask,sentence2,sentence2_mask,y
                #print sentence1.shape,sentence1_mask.shape,sentence2.shape,sentence2_mask.shape,y.shape
                #o = f_debug(sentence1,sentence1_mask,sentence2,sentence2_mask,y)
                #print o
                #print o[0].shape,o[1].shape,o[2].shape,o[3].shape
                cost = f_grad_shared(sentence1,sentence1_mask,sentence2,sentence2_mask,y)
                f_update(model_options['alpha'])
                if np.isnan(cost) or np.isinf(cost):
                    print 'NaN detected'
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    print 'Epoch ', epoch, 'Update ', uidx, 'Cost ', cost, 'Samples_seen ', samples_seen
                    sys.stdout.flush()
            print 'Epoch ', epoch, 'Update ', uidx, 'Cost ', cost, 'Samples_seen ', samples_seen
            sys.stdout.flush()
            '''
            if epoch % 5 == 0:
                kf_train = get_minibatches_idx(len(train), batch_size)
                print ('Train_score:')
                self.eva(f_pred, src_train, train, pairdict_train, kf_train, model_options)
                sys.stdout.flush()
            '''
            print ('Valid_score:')
            top1_res = self.eva(f_pred, src_valid, valid, pairdict_valid, kf_valid, model_options)
            self.save_result(model_options['result_path'] + 'dev.on.' + str(epoch) +'th_epoch_' + model_options['dataset'],top1_res)
            sys.stdout.flush()
            print ('Test_score:')
            top1_res = self.eva(f_pred, src_test, test, pairdict_test, kf_test, model_options)
            self.save_result(model_options['result_path'] + 'test.on.' + str(epoch) +'th_epoch_' + model_options['dataset'],top1_res)
            sys.stdout.flush()
            
            print ('%d epoch completed.' % epoch)
            sys.stdout.flush()
            '''
            if(best_validation_score < valid_score):
                best_iter = epoch
                best_validation_score = valid_score
            print ('Current best_dev_F is %.2f, at %d epoch'%(best_validation_score,best_iter))
            '''
        
            end_time = time.time()
            minu = int((end_time - start_time)/60)
            sec = (end_time - start_time) - 60 * minu
            print ('Time: %d min %.2f sec' % (minu, sec))
            sys.stdout.flush()
        print('Training completed!')
        sys.stdout.flush()
       
        
Exemplo n.º 59
0
def metdraw(filename,count_mets=None,met_file=None,show=False,
            engine='fdp',output='svg',quiet=False,q='1',Ln='1000',
            json=False,norun=False,status=False,dotcmd='dot',no_gpr=False,
            defaults=defaults):

    sbml_filename = filename
    if filename.endswith('.xml'):
        filename = filename[:-4]
    dot_filename = filename + '.dot'
    mets_filename = filename + '.mets'
    gpr_filename = filename + '.gpr'
    output_filename = filename + '.' + output

    if not quiet:
        print 'Loading model file', sbml_filename
    if filename.endswith('.json'):
        model = Model.build_model(*model_json.parse_json_file(file=sbml_filename))
    else:
        pieces = sbml.parse_sbml_file(file=sbml_filename)
        model = Model.build_model(**pieces)
        if not no_gpr:
            gpr.write_gpr_file(gpr.Gpr(pieces['reactions']),gpr_filename)
            if not quiet:
                print 'GPR written to file', gpr_filename
    model.name = filename
    model.set_param(**defaults)
    
    if count_mets:
        if not quiet:
            print 'Writing metabolite counts to file', filename+'.mets'
        Minors.write_met_file(Minors.count_species(model),
                              filename=mets_filename,
                              json=json)
        return
    
    if met_file:
        minors = Minors.read_met_file(filename=met_file)
        if not quiet:
            print len(minors), "minors loaded from file '{0}'".format(met_file)
    else:
        # find the minors in the model; for now, we create a temporary mets
        # file that is deleted after loading the minors
        temp_filename = mets_filename + '.TEMP'
        Minors.write_met_file(Minors.count_species(model),filename=temp_filename)
        minors = Minors.read_met_file(temp_filename)
        os.remove(temp_filename)
        if not quiet:
            print len(minors), "minors found in model"
    model.set_param(name="minors",value=minors)
        
    if show:
        model.display()
        display_parameters(defaults)
    
    if not quiet:
        print 'Creating reaction layout'
    g = layout.model_to_dot(model)
    
    if not quiet:
        print 'Creating DOT file', dot_filename
    g.to_file(dot_filename)
    
    # run graphviz
    if not quiet:
        print 'Preparing Graphviz call:'
    cmdstr = '{dot} -q{q} -Ln{Ln} -K{engine} -T{fmt} -o {outfile} {file}'
    cmd = cmdstr.format(dot=dotcmd,
                        q=q,Ln=Ln,
                        engine=engine,
                        fmt=output,
                        outfile=output_filename,
                        file=dot_filename)
    if not quiet:
        print '   ' + cmd
    if not norun:
        print 'Running Graphviz'
        error = os.system(cmd)
        if error:
            print "Error running dot:", error
    else:
        print 'ok'

    # clean up intermediate DOT file
    os.remove(dot_filename)
Exemplo n.º 60
0
            theano.In(X_batch),
        ],
        outputs=generator_loss,
        updates=generator_updates,
        givens={
            X: X_batch,
        },
    )

    return generator_func


if __name__ == '__main__':
    import model
    print('building model')
    layers = model.build_model()

    print('compiling theano functions')
    encoder_decoder_func = create_encoder_decoder_func(layers)
    discriminator_func = create_discriminator_func(layers)
    generator_func = create_generator_func(layers)

    import numpy as np
    X = np.random.random((16, 28 * 28)).astype(np.float32)
    pz = np.random.uniform(-2, 2, size=(16, 2)).astype(np.float32)

    print('X.shape = %r' % (X.shape,))
    print('pz.shape = %r' % (pz.shape,))

    print('running the three forward passes')
    print encoder_decoder_func(X)