def karate_test_scenario(deepwalk_path):

    y_path = '../../local_resources/karate/y.p'
    x_path = '../../local_resources/karate/X.p'

    target = utils.read_target(y_path)

    x, y = utils.read_data(x_path, y_path, threshold=0)

    names = [['deepwalk'], ['logistic']]

    x_deepwalk = pd.read_csv(deepwalk_path, index_col=0)
    # all_features = np.concatenate((x.toarray(), x_deepwalk), axis=1)
    X = [x_deepwalk.values, normalize(x, axis=0)]
    n_folds = 10
    results = run_detectors.run_all_datasets(X, y, names, classifiers, n_folds)
    all_results = utils.merge_results(results, n_folds)
    results, tests = utils.stats_test(all_results)
    tests[0].to_csv('../../results/karate/deepwalk_macro_pvalues' +
                    utils.get_timestamp() + '.csv')
    tests[1].to_csv('../../results/karate/deepwalk_micro_pvalues' +
                    utils.get_timestamp() + '.csv')
    print('macro', results[0])
    print('micro', results[1])
    macro_path = '../../results/karate/deepwalk_macro' + utils.get_timestamp(
    ) + '.csv'
    micro_path = '../../results/karate/deepwalk_micro' + utils.get_timestamp(
    ) + '.csv'
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)
def karate_scenario():
    deepwalk_path = 'local_resources/zachary_karate/size8_walks1_len10.emd'

    y_path = 'local_resources/zachary_karate/y.p'
    x_path = 'local_resources/zachary_karate/X.p'

    target = utils.read_target(y_path)

    x, y = utils.read_data(x_path, y_path, threshold=0)

    names = [['logistic'], ['deepwalk']]

    x_deepwalk = utils.read_embedding(deepwalk_path, target)
    # all_features = np.concatenate((x.toarray(), x_deepwalk), axis=1)
    X = [x_deepwalk, normalize(x, axis=0)]
    n_folds = 2
    results = run_all_datasets(X, y, names, classifiers, n_folds)
    all_results = utils.merge_results(results)
    results, tests = utils.stats_test(all_results)
    tests[0].to_csv('results/karate/deepwalk_macro_pvalues' +
                    utils.get_timestamp() + '.csv')
    tests[1].to_csv('results/karate/deepwalk_micro_pvalues' +
                    utils.get_timestamp() + '.csv')
    print 'macro', results[0]
    print 'micro', results[1]
    macro_path = 'results/karate/deepwalk_macro' + utils.get_timestamp(
    ) + '.csv'
    micro_path = 'results/karate/deepwalk_micro' + utils.get_timestamp(
    ) + '.csv'
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)
Exemplo n.º 3
0
def compare_embeddings():
    emd_reps = 10  # number of times to generate the embeddings
    det_reps = 10  # number of times to repeat the classification
    train_size = 4  # number of training examples
    size = 2  # the number of dimensions to embed
    walks = pd.read_csv('local_resources/zachary_karate/walks1_len10_p1_q1.csv', header=None).values
    p1 = Params(batch_size=4, embedding_size=size, neg_samples=5, skip_window=3, num_pairs=1500,
                logging_interval=100,
                initial_learning_rate=0.2)
    p2 = Params(batch_size=4, embedding_size=size, neg_samples=8, skip_window=3, num_pairs=1500,
                logging_interval=100,
                initial_learning_rate=0.2)
    param_arr = [p1, p2]
    elems, unigrams = np.unique(walks, return_counts=True)
    names = ['neg5', 'neg8']
    results = []
    for name, params in zip(names, param_arr):
        result = generate_embeddings(name, emd_reps, det_reps, params, walks, unigrams, train_size)
        results.append(result)

    means, tests = utils.array_stats_test(results)
    tests[0].to_csv('results/karate/tf_macro_pvalues' + utils.get_timestamp() + '.csv')
    tests[1].to_csv('results/karate/tf_micro_pvalues' + utils.get_timestamp() + '.csv')
    print('results', means)
    means_path = 'results/karate/tf_means' + utils.get_timestamp() + '.csv'
    means.to_csv(means_path, index=True)
    all_results = utils.merge_results(results)
    macro_path = 'results/karate/tf_macro' + utils.get_timestamp() + '.csv'
    micro_path = 'results/karate/tf_micro' + utils.get_timestamp() + '.csv'
    all_results[0].to_csv(macro_path, index=True)
    all_results[1].to_csv(micro_path, index=True)
def tf_train100000_emd_scenario():
    scaler = StandardScaler()
    feature_path = '../../local_resources/features_train100000.tsv'
    # feature_path = '../../local_resources/features_train100000.tsv'
    rf_features = pd.read_csv(feature_path, sep='\t', index_col=0)
    del rf_features.index.name
    emd = pd.read_csv('../../local_results/tf_train_100000.emd',
                      header=None,
                      index_col=0,
                      skiprows=1,
                      sep=" ")
    # emd = pd.read_csv('../../local_results/tf_train_100000.emd', header=None, index_col=0, skiprows=1, sep=" ")
    features, y = utils.get_classification_xy(feature_path, emd)
    all_feat = features.join(emd)
    X1 = features.values.astype(np.float)
    X1 = scaler.fit_transform(X1)
    X2 = all_feat.values.astype(np.float)
    X2 = scaler.fit_transform(X2)
    names = np.array([['L2 without emd', 'L1 without emd', 'RF without emd'],
                      ['L2 with emd', 'L1 with emd', 'RF with emd'],
                      ['L2 just emd', 'L1 just emd', 'RF just emd']])
    n_folds = 10
    results = run_all_datasets([X1, X2, emd.values], y, names, classifiers,
                               n_folds)
    all_results = utils.merge_results(results, n_folds)
    results, tests = utils.stats_test(all_results)
    print 'macro', results[0]
    print 'micro', results[1]
    macro_path = '../../results/neural/tf_macro_train100000' + utils.get_timestamp(
    ) + '.csv'
    micro_path = '../../results/neural/tf_micro_train100000' + utils.get_timestamp(
    ) + '.csv'
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)
Exemplo n.º 5
0
def batch_size_scenario():
    """
    Generate embeddings using different batch sizes for the ~1000 vertex polblogs network
    :return:
    """
    import visualisation
    s = datetime.datetime.now()
    y_path = '../../local_resources/political_blogs/y.p'
    x_path = '../../local_resources/political_blogs/X.p'
    y = utils.read_pickle(y_path)
    log_path = '../../local_resources/tf_logs/polblogs/'
    walk_path = '../../local_resources/political_blogs/walks_n1_l10.csv'
    size = 2  # dimensionality of the embedding
    batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128]
    embeddings = []
    for batch_size in batch_sizes:
        params = Params(walk_path, batch_size=batch_size, embedding_size=size, neg_samples=5, skip_window=5,
                        num_pairs=1500,
                        statistics_interval=10.0,
                        initial_learning_rate=0.1, save_path=log_path, epochs=5, concurrent_steps=4)

        path = '../../local_resources/political_blogs/embeddings/Win_batch_{}_{}.csv'.format(
            batch_size, utils.get_timestamp())

        embedding_in, embedding_out = HCE.main(params)

        visualisation.plot_poincare_embedding(embedding_in, y,
                                              '../../results/political_blogs/figs/poincare_polar_Win_batch_{}_{}.pdf'.format(
                                                  batch_size, utils.get_timestamp()))
        visualisation.plot_poincare_embedding(embedding_out, y,
                                              '../../results/political_blogs/figs/poincare_polar_Wout_batch_{}_{}.pdf'.format(
                                                  batch_size, utils.get_timestamp()))
        df_in = pd.DataFrame(data=embedding_in, index=np.arange(embedding_in.shape[0]))
        df_in.to_csv(path, sep=',')
        df_out = pd.DataFrame(data=embedding_out, index=np.arange(embedding_out.shape[0]))
        df_out.to_csv(
            '../../local_resources/political_blogs/embeddings/Wout_batch_{}_{}.csv'.format(
                batch_size, utils.get_timestamp()),
            sep=',')
        print('political blogs embedding generated in: ', datetime.datetime.now() - s)
        embeddings.append(embedding_in)

    x, y = utils.read_data(x_path, y_path, threshold=0)

    names = [[str(batch_size)] for batch_size in batch_sizes]
    n_folds = 10
    results = run_detectors.run_all_datasets(embeddings, y, names, classifiers, n_folds)
    all_results = utils.merge_results(results, n_folds)
    results, tests = utils.stats_test(all_results)
    tests[0].to_csv('../../results/political_blogs/batch_size_pvalues' + utils.get_timestamp() + '.csv')
    tests[1].to_csv('../../results/political_blogs/batch_size_pvalues' + utils.get_timestamp() + '.csv')
    print('macro', results[0])
    print('micro', results[1])
    macro_path = '../../results/political_blogs/batch_size_macro' + utils.get_timestamp() + '.csv'
    micro_path = '../../results/political_blogs/batch_size_micro' + utils.get_timestamp() + '.csv'
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)

    return path
def gensim_1in10000_emd_scenario():
    scaler = StandardScaler()
    feature_path = '../../local_resources/features_1in10000.tsv'
    rf_features = pd.read_csv(feature_path, sep='\t', index_col=0)
    emd = pd.read_csv('../../local_results/customer.emd',
                      header=None,
                      index_col=0,
                      skiprows=1,
                      sep=" ")
    features, y = utils.get_classification_xy(rf_features)
    # select only the data points that we have embeddings for
    features = features.loc[emd.index, :]
    y = y.loc[emd.index].values

    all_feat = features.join(emd, how='inner')
    print 'input features shape', all_feat.shape

    X1 = features.values.astype(np.float)
    X1 = scaler.fit_transform(X1)
    X2 = all_feat.values.astype(np.float)
    X2 = scaler.fit_transform(X2)
    # names = np.array(
    #     [['L2 without emd'], ['L2 with emd']])
    names = np.array([['L2 without emd'], ['L2 with emd'], ['L2 just emd']])
    # names = np.array(
    #     [['L2 without emd', 'L1 without emd', 'RF without emd'], ['L2 with emd', 'L1 with emd', 'RF with emd'],
    #      ['L2 just emd', 'L1 just emd', 'RF just emd']])
    # names = np.array([['without MF'], ['with MF']])
    n_folds = 5
    # np.random.seed(42)
    clf = LogisticRegression(multi_class='ovr',
                             penalty='l2',
                             solver='liblinear',
                             n_jobs=1,
                             max_iter=1000,
                             C=0.005)
    df = run_repetitions([X1, X2, emd.values], y, clf, names, reps=10)
    print df
    # results = run_all_datasets([X1, X2], y, names, [clf], n_folds)
    results = run_all_datasets([X1, X2, emd.values], y, names, classifiers,
                               n_folds)
    all_results = utils.merge_results(results, n_folds)
    results, tests = utils.stats_test(all_results)
    print 'macro', results[0]
    print 'micro', results[1]
    macro_path = '../../results/neural/gensim_1in10000' + utils.get_timestamp(
    ) + '.csv'
    micro_path = '../../results/neural/gensim_1in10000' + utils.get_timestamp(
    ) + '.csv'
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)
Exemplo n.º 7
0
def karate_deepwalk_grid_scenario():
    """
    evaluates a grid of embeddings at different sizes, walk lengths and walks per vertex for the karate network.
    Trying to understand why the DeepWalk performance was so poor.
    :return:
    """
    import os
    y_path = '../../local_resources/karate/y.p'
    x_path = '../../local_resources/karate/X.p'

    target = utils.read_target(y_path)

    x, y = utils.read_data(x_path, y_path, threshold=0)

    folder = '../../local_resources/karate/gridsearch/'
    names = [[elem] for elem in os.listdir(folder)]

    embeddings = []
    for name in names:
        emb = pd.read_csv(folder + name[0],
                          header=None,
                          index_col=0,
                          skiprows=1,
                          sep=" ")
        emb.sort_index(inplace=True)
        embeddings.append(emb.values)

    names.append(['hyperbolic'])
    hyp_path = '../../local_resources/karate/embeddings/Win_20170808-185202.csv'
    hyp_emb = pd.read_csv(hyp_path, index_col=0)
    embeddings.append(hyp_emb.values)

    n_folds = 10
    results = run_detectors.run_all_datasets(embeddings, y, names, classifiers,
                                             n_folds)
    all_results = utils.merge_results(results, n_folds)
    results, tests = utils.stats_test(all_results)
    tests[0].to_csv('../../results/karate/pvalues' + utils.get_timestamp() +
                    '.csv')
    tests[1].to_csv('../../results/karate/pvalues' + utils.get_timestamp() +
                    '.csv')
    print('macro', results[0])
    print('micro', results[1])
    macro_path = '../../results/karate/macro' + utils.get_timestamp() + '.csv'
    micro_path = '../../results/karate/micro' + utils.get_timestamp() + '.csv'
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)
def MF_scenario():
    scaler = StandardScaler()
    feature_path = '../../local_resources/features_1in10000.tsv'
    rf_features = pd.read_csv(feature_path, sep='\t', index_col=0)
    del rf_features.index.name
    emd = pd.read_csv('../../local_resources/roberto_emd.csv',
                      header=None,
                      index_col=0)
    del emd.index.name
    # emd = reduce_embedding(emd)
    # filter the features by customer ID
    temp = rf_features.join(emd[1], how='inner')
    features = temp.drop(1, axis=1)
    # extract the churn target labels
    print 'class distribution', features['target_churned'].value_counts()
    y = features['target_churned'].values.astype(int)
    # remove the labels
    features = features.ix[:, :-4]
    # encode the categoricals
    features['shippingCountry'] = utils.convert_to_other(
        features['shippingCountry'], pct=0.05, label='Other')
    features = pd.get_dummies(features, columns=['shippingCountry', 'gender'])
    all_feat = features.join(emd)
    X1 = features.values.astype(np.float)
    X1 = scaler.fit_transform(X1)
    X2 = all_feat.values.astype(np.float)
    X2 = scaler.fit_transform(X2)
    names = np.array([['L2 without MF', 'L1 without MF', 'RF without MF'],
                      ['L2 with MF', 'L1 with MF', 'RF with MF'],
                      ['L2 just MF', 'L1 just MF', 'RF just MF']])
    # names = np.array([['without MF'], ['with MF']])
    n_folds = 20
    results = run_all_datasets([X1, X2, emd.values], y, names, classifiers,
                               n_folds)
    all_results = utils.merge_results(results, n_folds)
    results, tests = utils.stats_test(all_results)
    print 'macro', results[0]
    print 'micro', results[1]
    macro_path = '../../results/MF/macro_1of100000no_cat' + utils.get_timestamp(
    ) + '.csv'
    micro_path = '../../results/MF/micro_1of100000no_cat' + utils.get_timestamp(
    ) + '.csv'
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)
Exemplo n.º 9
0
def karate_results(embeddings, names, n_reps, train_size):
    deepwalk_path = '../../local_resources/zachary_karate/size8_walks1_len10.emd'

    y_path = '../../local_resources/zachary_karate/y.p'
    x_path = '../../local_resources/zachary_karate/X.p'

    target = utils.read_target(y_path)

    x, y = utils.read_data(x_path, y_path, threshold=0)

    # names = [['embedding'], ['logistic']]

    names.append(['logistics'])

    # x_deepwalk = utils.read_embedding(deepwalk_path, target)
    # all_features = np.concatenate((x.toarray(), x_deepwalk), axis=1)
    # X = [normalize(embedding, axis=0), normalize(x, axis=0)]
    X = embeddings + [normalize(x, axis=0)]
    # names = ['embedding']
    # X = embedding

    results = []
    for exp in zip(X, names):
        tmp = run_detectors.run_experiments(exp[0], y, exp[1], classifiers,
                                            n_reps, train_size)
        results.append(tmp)
    all_results = utils.merge_results(results, n_reps)
    results, tests = utils.stats_test(all_results)
    tests[0].to_csv('../../results/karate/tf_macro_pvalues' +
                    utils.get_timestamp() + '.csv')
    tests[1].to_csv('../../results/karate/tf_micro_pvalues' +
                    utils.get_timestamp() + '.csv')
    print('macro', results[0])
    print('micro', results[1])
    macro_path = '../../results/karate/tf_macro' + utils.get_timestamp(
    ) + '.csv'
    micro_path = '../../results/karate/tf_micro' + utils.get_timestamp(
    ) + '.csv'
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)
    return results
Exemplo n.º 10
0
def political_blogs_scenario(embedding_path):
    # deepwalk_path = '../../local_resources/hyperbolic_embeddings/tf_test1.csv'

    y_path = '../../local_resources/political_blogs/y.p'
    x_path = '../../local_resources/political_blogs/X.p'
    sizes = [2, 4, 8, 16, 32, 64, 128]
    deepwalk_embeddings = []
    deepwalk_names = []
    dwpath = '../../local_resources/political_blogs/political_blogs'
    for size in sizes:
        path = dwpath + str(size) + '.emd'
        de = pd.read_csv(path, header=None, index_col=0, skiprows=1, sep=" ")
        de.sort_index(inplace=True)
        deepwalk_embeddings.append(de.values)
        deepwalk_names.append(['deepwalk' + str(size)])

    x, y = utils.read_data(x_path, y_path, threshold=0)

    names = [['hyperbolic'], ['logistic']]
    names = deepwalk_names + names

    embedding = pd.read_csv(embedding_path, index_col=0)
    # all_features = np.concatenate((x.toarray(), x_deepwalk), axis=1)
    X = deepwalk_embeddings + [embedding.values, normalize(x, axis=0)]
    n_folds = 10
    results = run_detectors.run_all_datasets(X, y, names, classifiers, n_folds)
    all_results = utils.merge_results(results, n_folds)
    results, tests = utils.stats_test(all_results)
    tests[0].to_csv('../../results/political_blogs/pvalues' +
                    utils.get_timestamp() + '.csv')
    tests[1].to_csv('../../results/political_blogs/pvalues' +
                    utils.get_timestamp() + '.csv')
    print('macro', results[0])
    print('micro', results[1])
    macro_path = '../../results/political_blogs/macro' + utils.get_timestamp(
    ) + '.csv'
    micro_path = '../../results/political_blogs/micro' + utils.get_timestamp(
    ) + '.csv'
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)
Exemplo n.º 11
0
def run_embedding_array(embeddings, names, n_reps, train_size):
    """
    As embeddings show significant variation we must compare many embeddings with the same params to ascertain quality
    :param embeddings:
    :param names:
    :param n_reps:
    :param train_size:
    :return: A tuple of pandas DataFrames (macro, micro)
    """
    y_path = 'local_resources/zachary_karate/y.p'
    x_path = 'local_resources/zachary_karate/X.p'

    x, y = utils.read_data(x_path, y_path, threshold=0)

    results = []
    for exp in zip(embeddings, names):
        tmp = run_detectors.run_experiments(exp[0], y, exp[1], classifiers, n_reps, train_size)
        results.append(tmp)

    all_results = utils.merge_results(results)

    return all_results
 def get_seeds():
     if request.method == 'GET':
         user_id = assign_cookie_value(request, COOKIE_KEY)
         print request.cookies
         raw_data = request.args
         data = json.loads(json.dumps(raw_data))
         seed_num = int(data['seeds'])
         indicator = data['indicator']
         net_dict = user_records.find_one({'user': user_id}, {
             'diffusionUl.network': 1,
             '_id': 0
         })
         if net_dict is None:
             results = {
                 'error':
                 'A valid network has to be generated for this operation!'
             }
             flag = update_modification_time(user_id)
             response = make_response(json.dumps(results), 404)
         else:
             net = transform.dict2graph(net_dict['diffusionUl']['network'])
             seeds = n.seeds(net, seed_num, indicator)
             new_net = u.merge_results(net_dict['diffusionUl']['network'],
                                       seeds=seeds)
             flag = user_records.update_one({'user': user_id}, {
                 '$set': {
                     'diffusionUl.network': new_net,
                     'diffusionUl.seeds': seeds
                 },
                 '$currentDate': {
                     'lastUpdate': {
                         '$type': 'date'
                     }
                 }
             })
             print flag
             response = make_response(json.dumps(new_net), 200)
         return bind_cookie(response, COOKIE_KEY, user_id, COOKIE_DURATION)
Exemplo n.º 13
0
def gensim_1in10000_debug_scenario():
    scaler = StandardScaler()
    feature_path = '../../local_resources/features_1in10000.tsv'
    rf_features = pd.read_csv(feature_path, sep='\t', index_col=0)
    del rf_features.index.name
    print 'input features shape', rf_features.shape
    emd = pd.read_csv('../../local_results/customer.emd',
                      header=None,
                      index_col=0,
                      skiprows=1,
                      sep=" ")
    print 'input embedding shape', rf_features.shape
    # emd = pd.read_csv('../../local_results/customer.emd', header=None, index_col=0, skiprows=1, sep=" ")
    features, y = utils.get_classification_xy(feature_path, emd)
    assert len(features) == features.index.values.unique()
    # all_feat = features.join(emd, how='inner')
    all_feat = features.join(emd)
    X1 = features.values.astype(np.float)
    X1 = scaler.fit_transform(X1)
    X2 = all_feat.values.astype(np.float)
    X2 = scaler.fit_transform(X2)
    names = np.array([['L2 without emd', 'L1 without emd', 'RF without emd'],
                      ['L2 with emd', 'L1 with emd', 'RF with emd'],
                      ['L2 just emd', 'L1 just emd', 'RF just emd']])
    # names = np.array([['without MF'], ['with MF']])
    n_folds = 10
    results = run_all_datasets([X1, X2, emd.values], y, names, classifiers,
                               n_folds)
    all_results = utils.merge_results(results, n_folds)
    results, tests = utils.stats_test(all_results)
    print 'macro', results[0]
    print 'micro', results[1]
    macro_path = '../../results/neural/gensim_1in10000' + utils.get_timestamp(
    ) + '.csv'
    micro_path = '../../results/neural/gensim_1in10000' + utils.get_timestamp(
    ) + '.csv'
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)
Exemplo n.º 14
0
def run_scenario(folder, embedding_path):
    y_path = '../../local_resources/{}/y.p'.format(folder)
    x_path = '../../local_resources/{}/X.p'.format(folder)
    sizes = [2, 4, 8, 16, 32, 64, 128]
    deepwalk_embeddings = []
    deepwalk_names = []
    dwpath = '../../local_resources/{0}/{1}'.format(folder, folder)
    for size in sizes:
        path = dwpath + str(size) + '.emd'
        de = pd.read_csv(path, header=None, index_col=0, skiprows=1, sep=" ")
        de.sort_index(inplace=True)
        deepwalk_embeddings.append(de.values)
        deepwalk_names.append(['deepwalk' + str(size)])

    x, y = utils.read_data(x_path, y_path, threshold=0)

    names = [['hyperbolic'], ['logistic']]
    names = deepwalk_names + names

    embedding = pd.read_csv(embedding_path, index_col=0)
    X = deepwalk_embeddings + [embedding.values, normalize(x, axis=0)]
    n_folds = 10
    results = run_detectors.run_all_datasets(X, y, names, classifiers, n_folds)
    all_results = utils.merge_results(results, n_folds)
    results, tests = utils.stats_test(all_results)
    tests[0].to_csv('../../results/{0}/pvalues{1}.csv'.format(
        folder, utils.get_timestamp()))
    tests[1].to_csv('../../results/{0}/pvalues{1}.csv'.format(
        folder, utils.get_timestamp()))
    print('macro', results[0])
    print('micro', results[1])
    macro_path = '../../results/{0}/macro{1}.csv'.format(
        folder, utils.get_timestamp())
    micro_path = '../../results/{0}/micro{1}.csv'.format(
        folder, utils.get_timestamp())
    results[0].to_csv(macro_path, index=True)
    results[1].to_csv(micro_path, index=True)
Exemplo n.º 15
0
parser.add_argument('--max_l', default=100)
parser.add_argument('--n_ul', default=100)
args = parser.parse_args()

save_dir = args.save_dir

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

save_dir = os.path.join(save_dir, 'fig4_tleft')
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

#If only plot is true, loads results, merges with DICA and plots again.
if int(args.merge_dica) == 1:
    utils.merge_results('results.pkl', 'dica.pkl', 'dica', save_dir)
    plotting.plot_tl(os.path.join(save_dir, 'merged.pkl'), ylim=4)
    exit()

n_task = int(args.n_task)
n = int(args.n)

p = int(args.p)
p_s = int(args.p_s)
p_conf = int(args.p_conf)
eps = float(args.eps)
g = float(args.g)

lambd = float(args.lambd)
lambd_test = float(args.lambd_test)
    def diffuse_ul():
        if request.method == 'GET':
            user_id = assign_cookie_value(request, COOKIE_KEY)
            print request.cookies
            raw_data = request.args
            data = json.loads(json.dumps(raw_data))
            model_type = data['model']
            seed_num = int(data['seeds'])
            indicator = data['indicator']
            net_dict = user_records.find_one({'user': user_id}, {
                'diffusionUl.network': 1,
                '_id': 0
            })
            seeds = user_records.find_one({'user': user_id}, {
                'diffusionUl.seeds': 1,
                '_id': 0
            })
            if net_dict is None:
                results = {
                    'error':
                    'A valid network has to be generated for this operation!'
                }
                flag = update_modification_time(user_id)
                response = make_response(json.dumps(results), 404)
            else:
                net = transform.dict2graph(net_dict['diffusionUl']['network'])
                if seeds is None:
                    seeds = n.seeds(net, seed_num, indicator)
                    flag = user_records.update_one({'user': user_id}, {
                        '$set': {
                            'diffusionUl.seeds': seeds
                        },
                        '$currentDate': {
                            'lastUpdate': {
                                '$type': 'date'
                            }
                        }
                    })
                    print flag
                else:
                    seeds = seeds['diffusionUl']['seeds']

                if model_type == 'LTM':
                    threshold = float(data['threshold'])
                    diffused = d.LTM(net, seeds, threshold)
                    new_net = u.merge_results(
                        net_dict['diffusionUl']['network'],
                        seeds=seeds,
                        opinion_leaders=None,
                        communities=None,
                        diffused=diffused)
                else:
                    pb_leaders = float(data['pb-leaders'])
                    pb_normal = float(data['pb-normal'])
                    scale = data['scale']
                    ratio = float(data['ratio'])

                    communities = user_records.find_one(
                        {'user': user_id}, {
                            'diffusionUl.communities': 1,
                            '_id': 0
                        })
                    opinion_leaders = user_records.find_one(
                        {'user': user_id}, {
                            'diffusionUl.opinionLeaders': 1,
                            '_id': 0
                        })

                    if communities is None:
                        communities = n.community_cnm(net)
                        flag = user_records.update_one({'user': user_id}, {
                            '$set': {
                                'diffusionUl.communities': communities
                            },
                            '$currentDate': {
                                'lastUpdate': {
                                    '$type': 'date'
                                }
                            }
                        })
                        print flag
                    else:
                        communities = communities['diffusionUl']['communities']

                    if opinion_leaders is None:
                        if scale == 'R':
                            opinion_leaders = n.find_transfer_sources(
                                communities, ratio)
                        else:
                            opinion_leaders = n.rnd_seeds(
                                net.GetNodes(), int(net.GetNodes() * ratio))
                        flag = user_records.update_one({'user': user_id}, {
                            '$set': {
                                'diffusionUl.opinionLeaders': opinion_leaders
                            },
                            '$currentDate': {
                                'lastUpdate': {
                                    '$type': 'date'
                                }
                            }
                        })
                        print flag
                    else:
                        opinion_leaders = opinion_leaders['diffusionUl'][
                            'opinionLeaders']

                    diffused = d.ICM(net, seeds, opinion_leaders, pb_leaders,
                                     pb_normal)
                    new_net = u.merge_results(
                        net_dict['diffusionUl']['network'],
                        seeds=seeds,
                        opinion_leaders=opinion_leaders,
                        communities=communities,
                        diffused=diffused)
                flag = update_modification_time(user_id)
                response = make_response(json.dumps(new_net), 200)

            return bind_cookie(response, COOKIE_KEY, user_id, COOKIE_DURATION)
 def get_transfer_sources():
     if request.method == 'GET':
         user_id = assign_cookie_value(request, COOKIE_KEY)
         print request.cookies
         raw_data = request.args
         data = json.loads(json.dumps(raw_data))
         ratio = float(data['ratio'])
         scale = data['scale']
         net_dict = user_records.find_one({'user': user_id}, {
             'diffusionUl.network': 1,
             '_id': 0
         })
         if net_dict is None:
             results = {
                 'error':
                 'A valid network has to be generated for this operation!'
             }
             flag = update_modification_time(user_id)
             response = make_response(json.dumps(results), 404)
         else:
             net = transform.dict2graph(net_dict['diffusionUl']['network'])
             if scale == 'R':
                 communities = user_records.find_one(
                     {'user': user_id}, {
                         'diffusionUl.communities': 1,
                         '_id': 0
                     })
                 if communities is None:
                     communities = n.community_cnm(net)
                     flag = user_records.update_one({'user': user_id}, {
                         '$set': {
                             'diffusionUl.communities': communities
                         },
                         '$currentDate': {
                             'lastUpdate': {
                                 '$type': 'date'
                             }
                         }
                     })
                     print flag
                 else:
                     communities = communities['diffusionUl']['communities']
                 opinion_leaders = n.find_transfer_sources(
                     communities, ratio)
             else:
                 opinion_leaders = n.rnd_seeds(net.GetNodes(),
                                               int(net.GetNodes() * ratio))
             new_net = u.merge_results(net_dict['diffusionUl']['network'],
                                       opinion_leaders=opinion_leaders)
             flag = user_records.update_one({'user': user_id}, {
                 '$set': {
                     'diffusionUl.network': new_net,
                     'diffusionUl.opinionLeaders': opinion_leaders
                 },
                 '$currentDate': {
                     'lastUpdate': {
                         '$type': 'date'
                     }
                 }
             })
             print flag
             response = make_response(json.dumps(new_net), 200)
         return bind_cookie(response, COOKIE_KEY, user_id, COOKIE_DURATION)
Exemplo n.º 18
0
parser.add_argument('--min_l', default=50)
parser.add_argument('--n_ul', default=100)
args = parser.parse_args()

save_dir = args.save_dir

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

save_dir = os.path.join(save_dir, 'fig5_bottom')
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

#If only plot is true, loads results, merges with MTL and plots again.
if int(args.merge_mtl) == 1:
    utils.merge_results('results.pkl', 'mtl.pkl', 'mtl', save_dir)
    plotting.plot_mtl(os.path.join(save_dir, 'merged.pkl'))
    plotting.plot_mtl_mse(os.path.join(save_dir, 'merged.pkl'))
    exit()

n_task = int(args.n_task)
n = int(args.n)

p = int(args.p)
p_s = int(args.p_s)
p_conf = int(args.p_conf)
eps = float(args.eps)
g = float(args.g)

lambd = float(args.lambd)
lambd_test = float(args.lambd_test)