示例#1
0
def train(epochs, layer, lr, lambd) :
    model = FNN(feature.shape[2], 1, layer, 128)
    model.apply(weight_init)
    optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = lambd)

    if args.cuda :
        model = model.cuda()
    
    print("Training FNN for %d layers, %f learning rate, %f lambda" %(layer, lr, lambd))

    for epoch in range(epochs) :
        train_epoch(epoch, model, optimizer, lambd)

    output = model(feature[:,0,:])
    output = output.unsqueeze(1)
    for i in range(1, feature.shape[1]) :
        output = torch.cat((output, model(feature[:, i, :]).unsqueeze(1)), 1)

    # t_weight = torch.stack((weight, weight),2)
    t_weight = weight
    output = output.squeeze()

    output = torch.mul(t_weight, output)
    output = torch.sum(output, 1)

    loss_train = Loss(output[idx_train], out[idx_train])

    loss_val = Loss(output[idx_val], out[idx_val])

    print("Result for %d layers, %f learning rate, %f lambda" %(layer, lr, lambd))
    print('loss_val: {:.4f}'.format(loss_val.item()))

    return model, output, loss_val
示例#2
0
def main(argv=None):    # pylint: disable=unused-argument
    algo = FLAGS.model
    eprint(algo)
    field_sizes = None
    if algo == 'fmuv':
        params = {
            'data_dir': FLAGS.data_dir,
            'num_epochs': FLAGS.num_epochs,
            'batch_size': FLAGS.batch_size,
            'input_dim': FLAGS.input_dim,
            'factor_order': 12,
            'l2_w': 0.001,
        }
        eprint(params)
        model = FMUV(**params)
    elif algo == 'fnn':
        field_sizes = [FLAGS.input_dim] * FLAGS.num_field
        params = {
            'data_dir': FLAGS.data_dir,
            'batch_size': FLAGS.batch_size,
            'num_epochs': FLAGS.num_epochs,
            'input_dim': FLAGS.input_dim,
            'layer_sizes': [field_sizes, 12, 200, 1],
            'layer_acts': ['none', 'tanh', 'none'],
            'layer_l2': [0, 0, 0],
            'l2_w': 0.001,
        }
        eprint(params)
        model = FNN(**params)
    elif algo == 'pnn1':
        field_sizes = [FLAGS.input_dim] * FLAGS.num_field
        params = {
            'data_dir': FLAGS.data_dir,
            'batch_size': FLAGS.batch_size,
            'num_epochs': FLAGS.num_epochs,
            'input_dim': FLAGS.input_dim,
            'layer_sizes': [field_sizes, 12, 1],
            'layer_acts': ['tanh', 'none'],
            'layer_l2': [0, 0],
            'kernel_l2': 0,
            'l2_w': 0.001,
        }
        eprint(params)
        model = PNN1(**params)

    X, y, B = worker_input(field_sizes=field_sizes)
    eval_once(model, X, y, B)    
示例#3
0
def train(epochs, layer, lr, lambd, idx_train, idx_val):
    model = FNN(feature.shape[1], out.shape[1], layer, 128)
    model.apply(weight_init)
    optimizer = optim.Adam(model.parameters(), lr = lr, weight_decay = lambd)

    if args.cuda:
        model = model.cuda()

    print("Training FNN for %d layers, %f learning rate, %f lambda" %(layer, lr, lambd))

    for epoch in range(epochs) :
        train_epoch(epoch, model, optimizer, lambd, idx_train, idx_val)

    output = model(feature)
    loss_val = F.mse_loss(output[idx_val], out[idx_val])

    print("Result for %d layers, %f learning rate, %f lambda" %(layer, lr, lambd))
    print('loss_val: {:.4f}'.format(loss_val.item()))
    
    return output, loss_val
示例#4
0
        'l2_v': 0,
    }

    model = FM(**fm_params)
elif algo == 'fnn':
    fnn_params = {
        'layer_sizes': [field_sizes, 10, 1],
        'layer_acts': ['tanh', 'none'],
        'drop_out': [0, 0],
        'opt_algo': 'gd',
        'learning_rate': 0.1,
        'layer_l2': [0, 0],
        'random_seed': 0
    }

    model = FNN(**fnn_params)
elif algo == 'ccpm':
    ccpm_params = {
        'layer_sizes': [field_sizes, 10, 5, 3],
        'layer_acts': ['tanh', 'tanh', 'none'],
        'drop_out': [0, 0, 0],
        'opt_algo': 'gd',
        'learning_rate': 0.1,
        'random_seed': 0
    }

    model = CCPM(**ccpm_params)
elif algo == 'pnn1':
    pnn1_params = {
        'layer_sizes': [field_sizes, 10, 1],
        'layer_acts': ['tanh', 'none'],
示例#5
0
}
params_op = {
    'lr': float(args.learning_rate),
    'momentum': float(args.momentum),
    'weight_decay': float(args.weight_decay)
}
path = args.path

training_set = SignalDataset(path, train=True)
train_loader = torch.utils.data.DataLoader(training_set, **params_dataloader)
num_classes = training_set.num_classes

test_set = SignalDataset(path, train=False)
test_loader = torch.utils.data.DataLoader(test_set, **params_dataloader)

model = FNN(**params_model, output_size=num_classes).to(device=device)
nll_loss = nn.NLLLoss()
op = torch.optim.SGD(model.parameters(), **params_op)

if args.resume:
    if os.path.isfile(args.resume):
        print("=> loading checkpoint '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        best_acc1 = checkpoint['best_acc1']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})".format(
            args.resume, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(args.resume))
示例#6
0
def main_func(activation, data_path, save_path, batch_size, epochs, layer_sizes, mi_methods, test_size, num_bins=[30], num_runs=1, try_gpu=False):
    
    check_for_data(save_path)

    if try_gpu:
        cuda = torch.cuda.is_available() 
        device = torch.device("cuda" if cuda else "cpu")
    else:
        device = torch.device("cpu")
    print("Using "+ str(device))

    loss_function = nn.CrossEntropyLoss() # Only one supported as of now
    max_values = []


    for i in tqdm.tqdm(range(args.start_from, num_runs)):
        torch.manual_seed(i)
        torch.cuda.manual_seed(i)
        np.random.seed(i)
        
        train_loader, test_loader, act_full_loader = prepare_data(data_path, test_size, i, batch_size)

        model = FNN(layer_sizes, activation=activation, seed=i).to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.0004)
        tr = Trainer(loss_function, epochs, model, optimizer, device)
        print("Start Training...")
        tr.train(train_loader, test_loader, act_full_loader)

        if args.save_train_error:
            print("Saving train and test error...")
            with open(save_path + '/training_history_run_{}_{}.pickle'.format(i, batch_size), 'wb') as f:
                pickle.dump([tr.error_train, tr.error_test], f, protocol=pickle.HIGHEST_PROTOCOL)
                f.close()
            with open(save_path + '/loss_run_{}_{}.pickle'.format(i, batch_size), 'wb') as f:
                pickle.dump([tr.train_loss, tr.val_loss], f, protocol=pickle.HIGHEST_PROTOCOL)
                f.close()

        if args.save_max_vals:
            print("Saving max activation values...")
            with open(save_path + '/max_values{}_{}.pickle'.format(i, batch_size), 'wb') as f:
                print(np.array(tr.max_value_layers_mi).max())
                pickle.dump(tr.max_value_layers_mi, f, protocol=pickle.HIGHEST_PROTOCOL)
                f.close()

        if args.save_mutual_information:
            for j in num_bins:
                print("Saving mutual information with {} bins...".format(j))
                if "variable" in mi_methods:
                    max_value = info_utils.get_max_value(tr.hidden_activations)
                    num_bins = int(max_value*15)
                    mutual_inf = MI(tr.hidden_activations, act_full_loader,act=activation, num_of_bins=j)
                    MI_XH, MI_YH = mutual_inf.get_mi(method="fixed")
                    with open(save_path + '/MI_XH_MI_YH_run_{}_{}_{}variable.pickle'.format(i, batch_size, j), 'wb') as f:
                        pickle.dump([MI_XH, MI_YH], f, protocol=pickle.HIGHEST_PROTOCOL)
                        f.close()


                if "fixed" in mi_methods:
                    mutual_inf = MI(tr.hidden_activations, act_full_loader,act=activation, num_of_bins=j)
                    MI_XH, MI_YH = mutual_inf.get_mi(method="fixed")

                    with open(save_path + '/MI_XH_MI_YH_run_{}_{}_{}bins.pickle'.format(i, batch_size, j), 'wb') as f:
                        pickle.dump([MI_XH, MI_YH], f, protocol=pickle.HIGHEST_PROTOCOL)
                        f.close()
                
                if "adaptive" in mi_methods:
                    mutual_inf = MI(tr.hidden_activations, act_full_loader,act=activation, num_of_bins=j)
                    MI_XH, MI_YH = mutual_inf.get_mi(method="adaptive")

                    with open(save_path + '/MI_XH_MI_YH_run_{}_{}_{}adaptive.pickle'.format(i, batch_size, j), 'wb') as f:
                        pickle.dump([MI_XH, MI_YH], f, protocol=pickle.HIGHEST_PROTOCOL)
                        f.close()

        minv, maxv = info_utils.get_min_max_vals(activation, tr.hidden_activations)
        max_values.append(maxv)
        print(max_values)

        # Need to delete everything from memory
        # because python will keep things in memory until computation of overwriting
        # variable is finished for the next iteration. This simply fills up my RAM.
        del model
        del tr
        if args.save_mutual_information:
            del mutual_inf
            del MI_XH
            del MI_YH
        del train_loader
        del test_loader
        del act_full_loader
    print("Done runnning...")
示例#7
0
from models import FactorizationMachines, FNN

dao = learning_dao()
dao.build()
X_train, X_test, y_train, y_test = dao.fetch_dataset()
print(X_train)
print(y_train)
features_info = dao.features_info

auc = tf.keras.metrics.AUC(num_thresholds=1000)
optimizer = tf.keras.optimizers.Adam(lr=0.01, decay=0.1)
fm_model = FactorizationMachines(features_info)
fm_model.compile(optimizer=optimizer,
                 loss='binary_crossentropy',
                 metrics=[auc])
fm_model.fit(x=X_train,
             y=y_train,
             epochs=100,
             batch_size=100000,
             validation_split=0.2)
fm_model.evaluate(x=X_test, y=y_test)

model = FNN(fm_model)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=[auc])
model.fit(x=X_train,
          y=y_train,
          epochs=100,
          batch_size=100000,
          validation_split=0.2)
model.evaluate(x=X_test, y=y_test)
示例#8
0
def worker_process(cluster, server):
    # assign ops to local worker by default
    with tf.device(
            tf.train.replica_device_setter(
                worker_device="/job:worker/task:%d" % FLAGS.task_index,
                cluster=cluster)):
        ps_num = cluster.num_tasks('ps')
        worker_num = cluster.num_tasks('worker')

        algo = FLAGS.model
        eprint(algo)
        field_sizes = None
        if algo == 'fmuv':
            params = {
                'data_dir': FLAGS.data_dir,
                'summary_dir': FLAGS.train_dir,
                'eval_dir': FLAGS.eval_dir,
                'random_seed': FLAGS.task_index,
                'batch_size': FLAGS.batch_size,
                'num_epochs': FLAGS.num_epochs,
                'input_dim': FLAGS.input_dim,
                'learning_rate': FLAGS.learning_rate,
                'opt_algo': FLAGS.optimizer,  #'adagrad',
                'sync': FLAGS.sync_replicas,
                'workers': FLAGS.all_workers,
                'factor_order': 12,
                'l2_w': 0.001,
            }
            eprint(params)
            model = FMUV(**params)
        elif algo == 'fnn':
            field_sizes = [FLAGS.input_dim] * FLAGS.num_field
            params = {
                'data_dir': FLAGS.data_dir,
                'summary_dir': FLAGS.train_dir,
                'eval_dir': FLAGS.eval_dir,
                'random_seed': FLAGS.task_index,
                'batch_size': FLAGS.batch_size,
                'num_epochs': FLAGS.num_epochs,
                'input_dim': FLAGS.input_dim,
                'learning_rate': FLAGS.learning_rate,
                'opt_algo': FLAGS.optimizer,  #'adagrad',
                'sync': FLAGS.sync_replicas,
                'workers': FLAGS.all_workers,
                'layer_sizes': [field_sizes, 12, 200, 1],
                'layer_acts': ['none', 'tanh', 'none'],
                'drop_out': [0, 0, 0],
                'layer_l2': [0, 0, 0],
                'l2_w': 0.001,
            }
            eprint(params)
            model = FNN(**params)
        elif algo == 'pnn1':
            field_sizes = [FLAGS.input_dim] * FLAGS.num_field
            params = {
                'data_dir': FLAGS.data_dir,
                'summary_dir': FLAGS.train_dir,
                'eval_dir': FLAGS.eval_dir,
                'random_seed': FLAGS.task_index,
                'batch_size': FLAGS.batch_size,
                'num_epochs': FLAGS.num_epochs,
                'input_dim': FLAGS.input_dim,
                'learning_rate': FLAGS.learning_rate,
                'opt_algo': FLAGS.optimizer,  #'adagrad',
                'sync': FLAGS.sync_replicas,
                'workers': FLAGS.all_workers,
                'layer_sizes': [field_sizes, 12, 1],
                'layer_acts': ['tanh', 'none'],
                'layer_l2': [0, 0],
                'kernel_l2': 0,
                'l2_w': 0.001,
            }
            eprint(params)
            model = PNN1(**params)

    worker_device = "/job:worker/task:%d" % FLAGS.task_index
    with tf.device(worker_device):
        X, y, B = worker_input(field_sizes=field_sizes)

    # The supervisor takes care of session initialization, restoring from
    # a checkpoint, and closing when done or an error occurs.
    #summary_writer = tf.summary.FileWriter(FLAGS.log_dir, model.graph)
    saver = tf.train.Saver(var_list=model.vars,
                           max_to_keep=FLAGS.max_models_to_keep)
    save_interval = 100 if FLAGS.model == "fmuv" else 600

    def load_pretrained_model(sess):
        restore_file = tf.train.latest_checkpoint(FLAGS.resume_dir)
        eprint('restore:', restore_file)
        saver.restore(sess, restore_file)

    load_model_function = load_pretrained_model if FLAGS.resume_dir != '' else None

    is_chief = (FLAGS.task_index == 0)
    # Create a "supervisor", which oversees the training process.
    sv = tf.train.Supervisor(is_chief=is_chief,
                             logdir=FLAGS.train_dir,
                             saver=saver,
                             init_fn=load_model_function,
                             global_step=model.global_step,
                             save_model_secs=save_interval)

    retry_times = 0
    N_failed = 10
    while retry_times < N_failed:
        try:
            eprint('retry_times = %d' % (retry_times))
            startt = time.time()
            with sv.managed_session(master=server.target) as sess:
                eprint('------ start ------', datetime.now())
                if is_chief:
                    time.sleep(10)
                run_while_batch(sv, sess, model, X, y, B)
            sv.stop()
            eprint("------ end sv stop:", datetime.now())
            endt = time.time()
            if endt - startt > 300:
                retry_times = N_failed
            else:
                time.sleep(10)
                retry_times += 1
        except:
            traceback.print_exc()
            retry_times += 1
            time.sleep(10)