示例#1
0
def create_models(samples, dictionary):

    neumf = NeuMF(len(samples), len(dictionary), 0.5, 8, [64, 32, 16, 8])
    attr_nets = dict()
    for key, labels in dictionary.items():
        if labels is None:
            # regression;
            attr_nets[key] = Regression(16)
        else:
            # classification
            # NOTE: class num doesnt include blank labels
            attr_nets[key] = Classification(16,
                                            len(labels) - 1)
    return neumf, attr_nets
示例#2
0
    config = args.parse_args()

    if config.mode == 'train':
        dropout_prob = config.dropout
        is_training = True
    else:
        dropout_prob = 0.0
        is_training = False

    if not HAS_DATASET and not IS_ON_NSML:  # It is not running on nsml
        if config.dataset_path:
            DATASET_PATH = config.dataset_path
        else:
            DATASET_PATH = '../sample_data/movie_review/'

    model = Regression(config.embedding, config.strmaxlen, dropout_prob,
            config.rnn_layers, use_gpu=USE_GPU, model_type=config.model_type)
    if USE_GPU:
        #if USE_GPU > 1:
        #    model = nn.DataParallel(model)
        model = model.cuda()

    # DONOTCHANGE: Reserved for nsml use
    bind_model(model, config)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)

    # DONOTCHANGE: They are reserved for nsml
    if config.pause:
        nsml.paused(scope=locals())
示例#3
0
    # Exact relation between Y and X
    def f(X):
        return np.log(10.0 *
                      (abs(X - 0.03) + 0.03)) * np.sin(np.pi *
                                                       (abs(X - 0.03) + 0.03))

    # Generate test data
    N_star = 2000
    X_star = np.linspace(-2., 2., N_star)[:, None]
    Y_star = f(X_star)

    # Model creation
    layers_P = np.array([X_dim + Z_dim, 100, 100, 100, Y_dim])
    layers_Q = np.array([X_dim + Y_dim, 100, 100, 100, Z_dim])
    layers_T = np.array([X_dim + Y_dim, 100, 100, 1])
    model = Regression(X, Y, layers_P, layers_Q, layers_T, lam=1.5)

    model.train(nIter=20000, batch_size=N)

    # Prediction
    plt.figure(1)
    N_samples = 500
    samples_mean = np.zeros((X_star.shape[0], N_samples))
    for i in range(0, N_samples):
        samples_mean[:, i:i + 1] = model.generate_sample(X_star)
        plt.plot(X_star, samples_mean[:, i:i + 1], 'k.', alpha=0.005)
    plt.plot(X, Y, 'r*', alpha=0.2, label='%d training data' % N)

    mu_pred = np.mean(samples_mean, axis=1)
    Sigma_pred = np.var(samples_mean, axis=1)
示例#4
0
文件: main.py 项目: mdhyeok/hackathon
    args.add_argument('--output', type=int, default=1)
    args.add_argument('--max_epoch', type=int, default=10)
    args.add_argument('--batch', type=int, default=2000)
    args.add_argument('--strmaxlen', type=int, default=200)
    args.add_argument('--embedding', type=int, default=8)

    # Select model
    args.add_argument('--model', type=str, default='classification', choices=['regression', 'classification'])
    config = args.parse_args()

    print('HAS_DATASET :', HAS_DATASET)
    print('IS_ON_NSML :', IS_ON_NSML)
    print('DATASET_PATH :', DATASET_PATH)

    model_type = {
        'regression' : Regression(config.embedding, config.strmaxlen),
        'classification' : Classification(config.embedding, config.strmaxlen),
    }

    model = model_type[config.model]
    if GPU_NUM:
        model = model.cuda()

    # DONOTCHANGE: Reserved for nsml use
    bind_model(model, config)

    criterion_type = {
        'regression' : nn.MSELoss(),
        'classification' : nn.CrossEntropyLoss(),
    }
    criterion = criterion_type[config.model]
示例#5
0
def main():

    INPUT_DIM = 21
    OUTPUT_DIM = 6
    BATCH_SIZE = 64
    EPOCH = 500
    data_path = "A"

    criterion = nn.MSELoss().to(DEVICE)
    model = Regression(INPUT_DIM, OUTPUT_DIM).to(DEVICE)
    optim = Adam(model.parameters(), lr=1e-3, betas=(0.5, 0.99))

    dataloader = get_dataloader(-1,
                                dataset_path=data_path,
                                train=True,
                                batch_size=BATCH_SIZE,
                                dataset_name="AirQualityV1")
    valid_loader = get_dataloader(-1,
                                  dataset_path=data_path,
                                  train=False,
                                  batch_size=5000,
                                  shuffle=False,
                                  dataset_name="AirQualityV1")

    loss_arr = []
    loss_arr_valid = []
    for i in range(EPOCH):
        # training
        total_loss = 0
        for index, (predict_feature, label, _, _) in enumerate(dataloader):

            model.train()
            predict_feature, label = predict_feature.to(DEVICE), label.to(
                DEVICE)
            logit = model(predict_feature)

            loss = criterion(logit, label)
            optim.zero_grad()
            loss.backward()
            optim.step()

            total_loss += loss.item()
        loss_arr.append(total_loss / len(dataloader))

        # valid
        # model.eval()
        # with torch.no_grad():
        #     valid_predict, valid_label, predict_feature, target_feature = next(iter(valid_loader))
        #     valid_predict, valid_label = valid_predict.to(DEVICE), valid_label.to(DEVICE)
        #     t_logit = model(valid_predict)
        #     t_loss = criterion(t_logit, valid_label)
        #     t_aqi, t_index = calculate_aqi(t_logit.data.cpu().numpy() + predict_feature.data.numpy())
        #     r_aqi, r_index = calculate_aqi(target_feature.data.numpy())
        #     acc = accuracy_score(r_index, t_index)
        #     f1 = f1_score(r_index, t_index, average="macro")
        #     print("epoch: {}, training_loss: {}; validate_loss: {}, acc: {:.4f}, f1: {:.4f}"
        #           .format(i+1, total_loss / len(dataloader), t_loss, acc, f1))
        #     loss_arr_valid.append(t_loss)
        #
        #         if (i + 1) % 50 == 0:
        #             plt.figure(figsize=(10, 10))
        #             y_labels = ["SO2(μg/m³)", "NO2(μg/m³)", "PM10(μg/m³)", "PM2.5(μg/m³)", "O3(μg/m³)", "CO(mg/m³)"]
        #             for item in range(1, 5):
        #                 plt.rcParams["font.family"] = 'Arial Unicode MS'
        #                 plt.subplot(2, 2, item)
        #                 plt.plot(np.array(r_aqi)[:, item], 'orange', label='real AQI', linewidth=3)
        #                 plt.plot(np.array(t_aqi)[:, item], 'royalblue', label='predict AQI', linewidth=3)
        #                 plt.ylabel("AQI ({})".format(y_labels[item]))
        #                 plt.xlabel("Time")
        #                 plt.legend()
        #                 plt.xticks(rotation=45)
        #             plt.savefig('./results/AQI{}_v1.png'.format(data_path), dpi=200)
        #
        # plt.figure(figsize=(6.4, 5.2))
        # plt.rcParams["font.family"] = 'Arial Unicode MS'
        # plt.plot(np.array(loss_arr), 'orange', label='training loss', linewidth=3)
        # plt.plot(np.array(loss_arr_valid), 'royalblue', label='validate loss', linewidth=3)
        # plt.ylabel('MSE')
        # plt.xlabel('Epoch')
        # plt.xticks(rotation=45)
        # plt.legend()
        # plt.savefig('./results/loss{}_v1.png'.format(data_path), dpi=200)
        # torch.save(model, './results/Regression{}.pk'.format(data_path))

        if (i + 1) % 50 == 0:
            test(model, data_path)