def main_1():
    sparseness = 5
    index = 1
    extend_near_num = 1
    userId, itemId, rating = load_data(csv_file_path(sparseness, index))
    result = calculate_distance(csv_file_path(sparseness, index))
    distance = convert_distance_result(result)

    file = extend_csv_file_path(sparseness, index, extend_near_num)
    print(file)
    u, i, r = extend_array(extend_near_num, distance, userId, itemId, rating)
    save_data(file, u, i, r)
def main():
    t1 = time()
    r = calculate_distance(csv_file_path(5, 1))
    c_r = convert_distance_result(r)
    print(time() - t1)
    for k, v in c_r.items():
        print(k, v[:3])
Пример #3
0
def uppc_experiment(**kwargs):
    exp_data = {}
    exp_data.update(kwargs)

    sparseness = kwargs['sparseness']
    index = kwargs['index']
    extend = kwargs['extend']
    matrix_type = kwargs['matrix_type']

    matrix = create_sparse_matrix(
        load_training_data(sparseness, index, extend, matrix_type=matrix_type))
    test_userId, test_itemId, test_rating = \
        load_data(csv_file_path(sparseness, index, training_set=False, matrix_type=matrix_type))

    R = np.corrcoef(matrix)
    R = np.nan_to_num(R, nan=-1)

    def predict(user, item):
        i = matrix[:, item]
        i[i >= 0] = 1
        return np.matmul(R[user], matrix[:, item]) / np.sum(
            np.abs(np.matmul(R[user], i)))

    test_predict = np.array(
        list(map(lambda u, i: predict(u, i), test_userId, test_itemId)))

    mae = np.mean(np.abs(test_predict - test_rating))
    rmse = np.sqrt(np.mean(np.square(test_predict - test_rating)))

    exp_data["mae"] = float(mae)
    exp_data["rmse"] = float(rmse)
    exp_data['datetime'] = datetime.now()
    print(exp_data)
    auto_insert_database(database_remote_config, exp_data,
                         f'uppc_{matrix_type}')
def save_extend_array(sparseness, index, extend_near_nums, matrix_type):
    if isinstance(extend_near_nums, int) is True:
        extend_near_nums = (extend_near_nums, )
    userId, itemId, rating = load_data(
        csv_file_path(sparseness, index, matrix_type=matrix_type))
    result = calculate_distance(
        csv_file_path(sparseness, index, matrix_type=matrix_type))
    distance = convert_distance_result(result)
    for e in extend_near_nums:
        extend_array_and_save(sparseness,
                              index,
                              e,
                              distance,
                              userId,
                              itemId,
                              rating,
                              matrix_type=matrix_type)
Пример #5
0
def experiment(experiment_data: ExperimentData, last_activation, matrix_type):
    sparseness = experiment_data.sparseness  # 5
    index = experiment_data.data_index  # 3
    mf_dim = experiment_data.mf_dim  # 32
    epochs = experiment_data.epochs  # 30
    batch_size = experiment_data.batch_size  # 128
    layers = experiment_data.layers  # [32, 16]
    reg_layers = experiment_data.reg_layers  # [0, 0]
    learning_rate = experiment_data.learning_rate  # 0.007
    extend_near_num = experiment_data.extend_near_num  # 5
    learner = experiment_data.learner  # adagrad
    optimizer = {
        'adagrad': Adagrad(lr=learning_rate),
        'rmsprop': RMSprop(lr=learning_rate),
        'adam': Adam(lr=learning_rate),
        'sgd': SGD(lr=learning_rate)
    }[learner]
    matrix_type = matrix_type

    dataset_name = 'sparseness%s_%s' % (sparseness, index)
    model_out_file = '%s_NeuMF_%d_%s_%s.h5' % (dataset_name, mf_dim, layers,
                                               datetime.now())

    userId, itemId, rating = load_training_data(sparseness,
                                                index,
                                                extend_near_num,
                                                matrix_type=matrix_type)
    test_userId, test_itemId, test_rating = \
        load_data(csv_file_path(sparseness, index, training_set=False, matrix_type=matrix_type))

    early_stop = keras.callbacks.EarlyStopping(monitor='mean_absolute_error',
                                               min_delta=0.0002,
                                               patience=10)

    model = custom_model.NeuMF.get_model(num_users=user_num,
                                         num_items=ws_num,
                                         layers=layers,
                                         reg_layers=reg_layers,
                                         mf_dim=mf_dim,
                                         last_activation=last_activation)

    model.compile(optimizer=optimizer,
                  loss='mae',
                  metrics=[metrics.mae, metrics.mse])

    model.fit([userId, itemId],
              rating,
              batch_size=batch_size,
              epochs=epochs,
              callbacks=[early_stop],
              verbose=0,
              shuffle=True)

    mkdir('./Trained')
    model.save('./Trained/{}'.format(model_out_file))
    loss, mae, mse = model.evaluate([test_userId, test_itemId],
                                    test_rating,
                                    steps=1)
    # print('loss: ', loss)
    # print('mae: ', mae)
    # print('rmse', np.sqrt(mse))
    experiment_data.model = model_out_file
    experiment_data.loss = loss
    experiment_data.mae = mae
    experiment_data.rmse = np.sqrt(mse)
    exp_data = experiment_data.to_dict()
    exp_data['datetime'] = datetime.now()
    exp_data['last_activation'] = last_activation
    print(exp_data)
    auto_insert_database(database_config, exp_data, f'ncf_{matrix_type}')
def experiment(**kwargs):
    sparseness = kwargs['sparseness']
    index = kwargs['index']
    epochs = kwargs['epochs']
    batch_size = kwargs['batch_size']
    mf_dim = kwargs['mf_dim']
    regs = kwargs['regs']
    last_activation = kwargs['last_activation']
    learning_rate = kwargs['learning_rate']
    extend_near_num = kwargs['extend_near_num']
    learner = kwargs['learner']
    matrix_type = kwargs['matrix_type']
    exp_data = {
        'sparseness': sparseness,
        'index': index,
        'epochs': epochs,
        'batch_size': batch_size,
        'mf_dim': mf_dim,
        'regs': regs,
        'last_activation': last_activation,
        'learning_rate': learning_rate,
        'extend_near_num': extend_near_num,
        'learner': learner
    }

    optimizer = {
        'adagrad': Adagrad(lr=learning_rate),
        'rmsprop': RMSprop(lr=learning_rate),
        'adam': Adam(lr=learning_rate),
        'sgd': SGD(lr=learning_rate)
    }[learner]
    dataset_name = 'sparseness%s_%s' % (sparseness, index)

    model_out_file = '%s_GMF_%s_extend_%s_%s.h5' % (
        dataset_name, regs, extend_near_num, datetime.now())

    # load file
    userId, itemId, rating = load_training_data(sparseness,
                                                index,
                                                extend_near_num,
                                                matrix_type=matrix_type)
    test_userId, test_itemId, test_rating = \
        load_data(csv_file_path(sparseness, index, training_set=False, matrix_type=matrix_type))
    # load end

    early_stop = keras.callbacks.EarlyStopping(monitor='mean_absolute_error',
                                               min_delta=0.0002,
                                               patience=10)

    model = custom_model.GMF.get_model(num_users=user_num,
                                       num_items=ws_num,
                                       latent_dim=mf_dim,
                                       regs=regs,
                                       last_activation=last_activation)

    model.compile(optimizer=optimizer,
                  loss='mae',
                  metrics=[metrics.mae, metrics.mse])

    model.fit([userId, itemId],
              rating,
              batch_size=batch_size,
              epochs=epochs,
              callbacks=[early_stop],
              verbose=0,
              shuffle=True)

    mkdir('./Trained')
    model.save('./Trained/{}'.format(model_out_file))
    loss, mae, mse = model.evaluate([test_userId, test_itemId],
                                    test_rating,
                                    steps=1)
    # print('loss: ', loss)
    # print('mae: ', mae)
    # print('rmse', np.sqrt(mse))
    exp_data['model'] = model_out_file
    exp_data['mae'] = float(mae)
    exp_data['rmse'] = float(np.sqrt(mse))
    exp_data['datetime'] = datetime.now()
    print(exp_data)
    auto_insert_database(database_config, exp_data, f'gmf_{matrix_type}')
    return exp_data
Пример #7
0
def experiment(**kwargs):
    sparseness = kwargs['sparseness']
    index = kwargs['index']
    epochs = kwargs['epochs']
    batch_size = kwargs['batch_size']
    layers = kwargs['layers']
    reg_layers = kwargs['reg_layers']
    fake_layers = kwargs['fake_layers']
    fake_reg_layers = kwargs['fake_reg_layers']
    last_activation = kwargs['last_activation']
    fake_last_activation = kwargs['fake_last_activation']
    learning_rate = kwargs['learning_rate']
    extend_near_num = kwargs['extend_near_num']
    learner = kwargs['learner']
    exp_data = {
        'sparseness': sparseness,
        'index': index,
        'epochs': epochs,
        'batch_size': batch_size,
        'layers': layers,
        'reg_layers': reg_layers,
        'fake_layers': fake_layers,
        'fake_reg_layers': fake_reg_layers,
        'last_activation': last_activation,
        'fake_last_activation': fake_last_activation,
        'learning_rate': learning_rate,
        'extend_near_num': extend_near_num,
        'learner': learner
    }

    optimizer = {'adagrad': Adagrad(lr=learning_rate),
                 'rmsprop': RMSprop(lr=learning_rate),
                 'adam': Adam(lr=learning_rate),
                 'sgd': SGD(lr=learning_rate)}[learner]
    dataset_name = 'sparseness%s_%s' % (sparseness, index)
    model_out_file = '%s_exMLP_%s_%s_%s.h5' % (dataset_name, layers, fake_layers, datetime.now())

    userId, itemId, rating = load_data(csv_file_path(sparseness, index))

    result = calculate_distance(csv_file_path(sparseness, index))
    distance = convert_distance_result(result)

    fake_user_id, userId, itemId, rating = extend_array(extend_near_num, distance, userId, itemId, rating)

    test_userId, test_itemId, test_rating = load_data(csv_file_path(sparseness, index, training_set=False))

    # early_stop = keras.callbacks.EarlyStopping(monitor='mean_absolute_error', min_delta=0.0002, patience=10)

    model = custom_model.extend_mlp_model.get_model(num_users=user_num, num_items=ws_num,
                                                    layers=layers, reg_layers=reg_layers,
                                                    fake_layers=fake_layers, fake_reg_layers=fake_reg_layers,
                                                    last_activation=last_activation,
                                                    fake_last_activation=fake_last_activation)

    model.compile(optimizer=optimizer)

    model.fit(x=[fake_user_id, userId, itemId, rating],
              y=None,
              batch_size=batch_size, epochs=epochs,
              verbose=1,
              shuffle=False)
    mkdir('./Trained')
    model.save('./Trained/{}'.format(model_out_file))
    # prediction, fake_prediction = model.predict([np.zeros(len(test_userId)), test_userId, test_itemId])
    # mae = np.mean(np.abs(prediction - test_rating))
    # rmse = np.sqrt(np.mean(np.square(prediction - test_rating)))

    _, _, mae, rmse = model.evaluate([np.zeros(len(test_rating)), test_userId, test_itemId, test_rating], steps=1)
    # print('loss: ', loss)
    # print('mae: ', mae)
    # print('rmse', np.sqrt(mse))
    exp_data['model'] = model_out_file
    exp_data['mae'] = float(mae)
    exp_data['rmse'] = float(rmse)
    exp_data['datetime'] = datetime.now()
    exp_data['last_activation'] = last_activation
    print(exp_data)
    auto_insert_database(database_config, exp_data, 'exmlp_rt')