def karate_test_scenario(deepwalk_path): y_path = '../../local_resources/karate/y.p' x_path = '../../local_resources/karate/X.p' target = utils.read_target(y_path) x, y = utils.read_data(x_path, y_path, threshold=0) names = [['deepwalk'], ['logistic']] x_deepwalk = pd.read_csv(deepwalk_path, index_col=0) # all_features = np.concatenate((x.toarray(), x_deepwalk), axis=1) X = [x_deepwalk.values, normalize(x, axis=0)] n_folds = 10 results = run_detectors.run_all_datasets(X, y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) tests[0].to_csv('../../results/karate/deepwalk_macro_pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('../../results/karate/deepwalk_micro_pvalues' + utils.get_timestamp() + '.csv') print('macro', results[0]) print('micro', results[1]) macro_path = '../../results/karate/deepwalk_macro' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/karate/deepwalk_micro' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def karate_scenario(): deepwalk_path = 'local_resources/zachary_karate/size8_walks1_len10.emd' y_path = 'local_resources/zachary_karate/y.p' x_path = 'local_resources/zachary_karate/X.p' target = utils.read_target(y_path) x, y = utils.read_data(x_path, y_path, threshold=0) names = [['logistic'], ['deepwalk']] x_deepwalk = utils.read_embedding(deepwalk_path, target) # all_features = np.concatenate((x.toarray(), x_deepwalk), axis=1) X = [x_deepwalk, normalize(x, axis=0)] n_folds = 2 results = run_all_datasets(X, y, names, classifiers, n_folds) all_results = utils.merge_results(results) results, tests = utils.stats_test(all_results) tests[0].to_csv('results/karate/deepwalk_macro_pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('results/karate/deepwalk_micro_pvalues' + utils.get_timestamp() + '.csv') print 'macro', results[0] print 'micro', results[1] macro_path = 'results/karate/deepwalk_macro' + utils.get_timestamp( ) + '.csv' micro_path = 'results/karate/deepwalk_micro' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def compare_embeddings(): emd_reps = 10 # number of times to generate the embeddings det_reps = 10 # number of times to repeat the classification train_size = 4 # number of training examples size = 2 # the number of dimensions to embed walks = pd.read_csv('local_resources/zachary_karate/walks1_len10_p1_q1.csv', header=None).values p1 = Params(batch_size=4, embedding_size=size, neg_samples=5, skip_window=3, num_pairs=1500, logging_interval=100, initial_learning_rate=0.2) p2 = Params(batch_size=4, embedding_size=size, neg_samples=8, skip_window=3, num_pairs=1500, logging_interval=100, initial_learning_rate=0.2) param_arr = [p1, p2] elems, unigrams = np.unique(walks, return_counts=True) names = ['neg5', 'neg8'] results = [] for name, params in zip(names, param_arr): result = generate_embeddings(name, emd_reps, det_reps, params, walks, unigrams, train_size) results.append(result) means, tests = utils.array_stats_test(results) tests[0].to_csv('results/karate/tf_macro_pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('results/karate/tf_micro_pvalues' + utils.get_timestamp() + '.csv') print('results', means) means_path = 'results/karate/tf_means' + utils.get_timestamp() + '.csv' means.to_csv(means_path, index=True) all_results = utils.merge_results(results) macro_path = 'results/karate/tf_macro' + utils.get_timestamp() + '.csv' micro_path = 'results/karate/tf_micro' + utils.get_timestamp() + '.csv' all_results[0].to_csv(macro_path, index=True) all_results[1].to_csv(micro_path, index=True)
def tf_train100000_emd_scenario(): scaler = StandardScaler() feature_path = '../../local_resources/features_train100000.tsv' # feature_path = '../../local_resources/features_train100000.tsv' rf_features = pd.read_csv(feature_path, sep='\t', index_col=0) del rf_features.index.name emd = pd.read_csv('../../local_results/tf_train_100000.emd', header=None, index_col=0, skiprows=1, sep=" ") # emd = pd.read_csv('../../local_results/tf_train_100000.emd', header=None, index_col=0, skiprows=1, sep=" ") features, y = utils.get_classification_xy(feature_path, emd) all_feat = features.join(emd) X1 = features.values.astype(np.float) X1 = scaler.fit_transform(X1) X2 = all_feat.values.astype(np.float) X2 = scaler.fit_transform(X2) names = np.array([['L2 without emd', 'L1 without emd', 'RF without emd'], ['L2 with emd', 'L1 with emd', 'RF with emd'], ['L2 just emd', 'L1 just emd', 'RF just emd']]) n_folds = 10 results = run_all_datasets([X1, X2, emd.values], y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) print 'macro', results[0] print 'micro', results[1] macro_path = '../../results/neural/tf_macro_train100000' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/neural/tf_micro_train100000' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def batch_size_scenario(): """ Generate embeddings using different batch sizes for the ~1000 vertex polblogs network :return: """ import visualisation s = datetime.datetime.now() y_path = '../../local_resources/political_blogs/y.p' x_path = '../../local_resources/political_blogs/X.p' y = utils.read_pickle(y_path) log_path = '../../local_resources/tf_logs/polblogs/' walk_path = '../../local_resources/political_blogs/walks_n1_l10.csv' size = 2 # dimensionality of the embedding batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128] embeddings = [] for batch_size in batch_sizes: params = Params(walk_path, batch_size=batch_size, embedding_size=size, neg_samples=5, skip_window=5, num_pairs=1500, statistics_interval=10.0, initial_learning_rate=0.1, save_path=log_path, epochs=5, concurrent_steps=4) path = '../../local_resources/political_blogs/embeddings/Win_batch_{}_{}.csv'.format( batch_size, utils.get_timestamp()) embedding_in, embedding_out = HCE.main(params) visualisation.plot_poincare_embedding(embedding_in, y, '../../results/political_blogs/figs/poincare_polar_Win_batch_{}_{}.pdf'.format( batch_size, utils.get_timestamp())) visualisation.plot_poincare_embedding(embedding_out, y, '../../results/political_blogs/figs/poincare_polar_Wout_batch_{}_{}.pdf'.format( batch_size, utils.get_timestamp())) df_in = pd.DataFrame(data=embedding_in, index=np.arange(embedding_in.shape[0])) df_in.to_csv(path, sep=',') df_out = pd.DataFrame(data=embedding_out, index=np.arange(embedding_out.shape[0])) df_out.to_csv( '../../local_resources/political_blogs/embeddings/Wout_batch_{}_{}.csv'.format( batch_size, utils.get_timestamp()), sep=',') print('political blogs embedding generated in: ', datetime.datetime.now() - s) embeddings.append(embedding_in) x, y = utils.read_data(x_path, y_path, threshold=0) names = [[str(batch_size)] for batch_size in batch_sizes] n_folds = 10 results = run_detectors.run_all_datasets(embeddings, y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) tests[0].to_csv('../../results/political_blogs/batch_size_pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('../../results/political_blogs/batch_size_pvalues' + utils.get_timestamp() + '.csv') print('macro', results[0]) print('micro', results[1]) macro_path = '../../results/political_blogs/batch_size_macro' + utils.get_timestamp() + '.csv' micro_path = '../../results/political_blogs/batch_size_micro' + utils.get_timestamp() + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True) return path
def gensim_1in10000_emd_scenario(): scaler = StandardScaler() feature_path = '../../local_resources/features_1in10000.tsv' rf_features = pd.read_csv(feature_path, sep='\t', index_col=0) emd = pd.read_csv('../../local_results/customer.emd', header=None, index_col=0, skiprows=1, sep=" ") features, y = utils.get_classification_xy(rf_features) # select only the data points that we have embeddings for features = features.loc[emd.index, :] y = y.loc[emd.index].values all_feat = features.join(emd, how='inner') print 'input features shape', all_feat.shape X1 = features.values.astype(np.float) X1 = scaler.fit_transform(X1) X2 = all_feat.values.astype(np.float) X2 = scaler.fit_transform(X2) # names = np.array( # [['L2 without emd'], ['L2 with emd']]) names = np.array([['L2 without emd'], ['L2 with emd'], ['L2 just emd']]) # names = np.array( # [['L2 without emd', 'L1 without emd', 'RF without emd'], ['L2 with emd', 'L1 with emd', 'RF with emd'], # ['L2 just emd', 'L1 just emd', 'RF just emd']]) # names = np.array([['without MF'], ['with MF']]) n_folds = 5 # np.random.seed(42) clf = LogisticRegression(multi_class='ovr', penalty='l2', solver='liblinear', n_jobs=1, max_iter=1000, C=0.005) df = run_repetitions([X1, X2, emd.values], y, clf, names, reps=10) print df # results = run_all_datasets([X1, X2], y, names, [clf], n_folds) results = run_all_datasets([X1, X2, emd.values], y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) print 'macro', results[0] print 'micro', results[1] macro_path = '../../results/neural/gensim_1in10000' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/neural/gensim_1in10000' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def karate_deepwalk_grid_scenario(): """ evaluates a grid of embeddings at different sizes, walk lengths and walks per vertex for the karate network. Trying to understand why the DeepWalk performance was so poor. :return: """ import os y_path = '../../local_resources/karate/y.p' x_path = '../../local_resources/karate/X.p' target = utils.read_target(y_path) x, y = utils.read_data(x_path, y_path, threshold=0) folder = '../../local_resources/karate/gridsearch/' names = [[elem] for elem in os.listdir(folder)] embeddings = [] for name in names: emb = pd.read_csv(folder + name[0], header=None, index_col=0, skiprows=1, sep=" ") emb.sort_index(inplace=True) embeddings.append(emb.values) names.append(['hyperbolic']) hyp_path = '../../local_resources/karate/embeddings/Win_20170808-185202.csv' hyp_emb = pd.read_csv(hyp_path, index_col=0) embeddings.append(hyp_emb.values) n_folds = 10 results = run_detectors.run_all_datasets(embeddings, y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) tests[0].to_csv('../../results/karate/pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('../../results/karate/pvalues' + utils.get_timestamp() + '.csv') print('macro', results[0]) print('micro', results[1]) macro_path = '../../results/karate/macro' + utils.get_timestamp() + '.csv' micro_path = '../../results/karate/micro' + utils.get_timestamp() + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def MF_scenario(): scaler = StandardScaler() feature_path = '../../local_resources/features_1in10000.tsv' rf_features = pd.read_csv(feature_path, sep='\t', index_col=0) del rf_features.index.name emd = pd.read_csv('../../local_resources/roberto_emd.csv', header=None, index_col=0) del emd.index.name # emd = reduce_embedding(emd) # filter the features by customer ID temp = rf_features.join(emd[1], how='inner') features = temp.drop(1, axis=1) # extract the churn target labels print 'class distribution', features['target_churned'].value_counts() y = features['target_churned'].values.astype(int) # remove the labels features = features.ix[:, :-4] # encode the categoricals features['shippingCountry'] = utils.convert_to_other( features['shippingCountry'], pct=0.05, label='Other') features = pd.get_dummies(features, columns=['shippingCountry', 'gender']) all_feat = features.join(emd) X1 = features.values.astype(np.float) X1 = scaler.fit_transform(X1) X2 = all_feat.values.astype(np.float) X2 = scaler.fit_transform(X2) names = np.array([['L2 without MF', 'L1 without MF', 'RF without MF'], ['L2 with MF', 'L1 with MF', 'RF with MF'], ['L2 just MF', 'L1 just MF', 'RF just MF']]) # names = np.array([['without MF'], ['with MF']]) n_folds = 20 results = run_all_datasets([X1, X2, emd.values], y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) print 'macro', results[0] print 'micro', results[1] macro_path = '../../results/MF/macro_1of100000no_cat' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/MF/micro_1of100000no_cat' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def karate_results(embeddings, names, n_reps, train_size): deepwalk_path = '../../local_resources/zachary_karate/size8_walks1_len10.emd' y_path = '../../local_resources/zachary_karate/y.p' x_path = '../../local_resources/zachary_karate/X.p' target = utils.read_target(y_path) x, y = utils.read_data(x_path, y_path, threshold=0) # names = [['embedding'], ['logistic']] names.append(['logistics']) # x_deepwalk = utils.read_embedding(deepwalk_path, target) # all_features = np.concatenate((x.toarray(), x_deepwalk), axis=1) # X = [normalize(embedding, axis=0), normalize(x, axis=0)] X = embeddings + [normalize(x, axis=0)] # names = ['embedding'] # X = embedding results = [] for exp in zip(X, names): tmp = run_detectors.run_experiments(exp[0], y, exp[1], classifiers, n_reps, train_size) results.append(tmp) all_results = utils.merge_results(results, n_reps) results, tests = utils.stats_test(all_results) tests[0].to_csv('../../results/karate/tf_macro_pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('../../results/karate/tf_micro_pvalues' + utils.get_timestamp() + '.csv') print('macro', results[0]) print('micro', results[1]) macro_path = '../../results/karate/tf_macro' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/karate/tf_micro' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True) return results
def political_blogs_scenario(embedding_path): # deepwalk_path = '../../local_resources/hyperbolic_embeddings/tf_test1.csv' y_path = '../../local_resources/political_blogs/y.p' x_path = '../../local_resources/political_blogs/X.p' sizes = [2, 4, 8, 16, 32, 64, 128] deepwalk_embeddings = [] deepwalk_names = [] dwpath = '../../local_resources/political_blogs/political_blogs' for size in sizes: path = dwpath + str(size) + '.emd' de = pd.read_csv(path, header=None, index_col=0, skiprows=1, sep=" ") de.sort_index(inplace=True) deepwalk_embeddings.append(de.values) deepwalk_names.append(['deepwalk' + str(size)]) x, y = utils.read_data(x_path, y_path, threshold=0) names = [['hyperbolic'], ['logistic']] names = deepwalk_names + names embedding = pd.read_csv(embedding_path, index_col=0) # all_features = np.concatenate((x.toarray(), x_deepwalk), axis=1) X = deepwalk_embeddings + [embedding.values, normalize(x, axis=0)] n_folds = 10 results = run_detectors.run_all_datasets(X, y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) tests[0].to_csv('../../results/political_blogs/pvalues' + utils.get_timestamp() + '.csv') tests[1].to_csv('../../results/political_blogs/pvalues' + utils.get_timestamp() + '.csv') print('macro', results[0]) print('micro', results[1]) macro_path = '../../results/political_blogs/macro' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/political_blogs/micro' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def run_embedding_array(embeddings, names, n_reps, train_size): """ As embeddings show significant variation we must compare many embeddings with the same params to ascertain quality :param embeddings: :param names: :param n_reps: :param train_size: :return: A tuple of pandas DataFrames (macro, micro) """ y_path = 'local_resources/zachary_karate/y.p' x_path = 'local_resources/zachary_karate/X.p' x, y = utils.read_data(x_path, y_path, threshold=0) results = [] for exp in zip(embeddings, names): tmp = run_detectors.run_experiments(exp[0], y, exp[1], classifiers, n_reps, train_size) results.append(tmp) all_results = utils.merge_results(results) return all_results
def get_seeds(): if request.method == 'GET': user_id = assign_cookie_value(request, COOKIE_KEY) print request.cookies raw_data = request.args data = json.loads(json.dumps(raw_data)) seed_num = int(data['seeds']) indicator = data['indicator'] net_dict = user_records.find_one({'user': user_id}, { 'diffusionUl.network': 1, '_id': 0 }) if net_dict is None: results = { 'error': 'A valid network has to be generated for this operation!' } flag = update_modification_time(user_id) response = make_response(json.dumps(results), 404) else: net = transform.dict2graph(net_dict['diffusionUl']['network']) seeds = n.seeds(net, seed_num, indicator) new_net = u.merge_results(net_dict['diffusionUl']['network'], seeds=seeds) flag = user_records.update_one({'user': user_id}, { '$set': { 'diffusionUl.network': new_net, 'diffusionUl.seeds': seeds }, '$currentDate': { 'lastUpdate': { '$type': 'date' } } }) print flag response = make_response(json.dumps(new_net), 200) return bind_cookie(response, COOKIE_KEY, user_id, COOKIE_DURATION)
def gensim_1in10000_debug_scenario(): scaler = StandardScaler() feature_path = '../../local_resources/features_1in10000.tsv' rf_features = pd.read_csv(feature_path, sep='\t', index_col=0) del rf_features.index.name print 'input features shape', rf_features.shape emd = pd.read_csv('../../local_results/customer.emd', header=None, index_col=0, skiprows=1, sep=" ") print 'input embedding shape', rf_features.shape # emd = pd.read_csv('../../local_results/customer.emd', header=None, index_col=0, skiprows=1, sep=" ") features, y = utils.get_classification_xy(feature_path, emd) assert len(features) == features.index.values.unique() # all_feat = features.join(emd, how='inner') all_feat = features.join(emd) X1 = features.values.astype(np.float) X1 = scaler.fit_transform(X1) X2 = all_feat.values.astype(np.float) X2 = scaler.fit_transform(X2) names = np.array([['L2 without emd', 'L1 without emd', 'RF without emd'], ['L2 with emd', 'L1 with emd', 'RF with emd'], ['L2 just emd', 'L1 just emd', 'RF just emd']]) # names = np.array([['without MF'], ['with MF']]) n_folds = 10 results = run_all_datasets([X1, X2, emd.values], y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) print 'macro', results[0] print 'micro', results[1] macro_path = '../../results/neural/gensim_1in10000' + utils.get_timestamp( ) + '.csv' micro_path = '../../results/neural/gensim_1in10000' + utils.get_timestamp( ) + '.csv' results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
def run_scenario(folder, embedding_path): y_path = '../../local_resources/{}/y.p'.format(folder) x_path = '../../local_resources/{}/X.p'.format(folder) sizes = [2, 4, 8, 16, 32, 64, 128] deepwalk_embeddings = [] deepwalk_names = [] dwpath = '../../local_resources/{0}/{1}'.format(folder, folder) for size in sizes: path = dwpath + str(size) + '.emd' de = pd.read_csv(path, header=None, index_col=0, skiprows=1, sep=" ") de.sort_index(inplace=True) deepwalk_embeddings.append(de.values) deepwalk_names.append(['deepwalk' + str(size)]) x, y = utils.read_data(x_path, y_path, threshold=0) names = [['hyperbolic'], ['logistic']] names = deepwalk_names + names embedding = pd.read_csv(embedding_path, index_col=0) X = deepwalk_embeddings + [embedding.values, normalize(x, axis=0)] n_folds = 10 results = run_detectors.run_all_datasets(X, y, names, classifiers, n_folds) all_results = utils.merge_results(results, n_folds) results, tests = utils.stats_test(all_results) tests[0].to_csv('../../results/{0}/pvalues{1}.csv'.format( folder, utils.get_timestamp())) tests[1].to_csv('../../results/{0}/pvalues{1}.csv'.format( folder, utils.get_timestamp())) print('macro', results[0]) print('micro', results[1]) macro_path = '../../results/{0}/macro{1}.csv'.format( folder, utils.get_timestamp()) micro_path = '../../results/{0}/micro{1}.csv'.format( folder, utils.get_timestamp()) results[0].to_csv(macro_path, index=True) results[1].to_csv(micro_path, index=True)
parser.add_argument('--max_l', default=100) parser.add_argument('--n_ul', default=100) args = parser.parse_args() save_dir = args.save_dir if not os.path.exists(save_dir): os.makedirs(save_dir) save_dir = os.path.join(save_dir, 'fig4_tleft') if not os.path.exists(save_dir): os.makedirs(save_dir) #If only plot is true, loads results, merges with DICA and plots again. if int(args.merge_dica) == 1: utils.merge_results('results.pkl', 'dica.pkl', 'dica', save_dir) plotting.plot_tl(os.path.join(save_dir, 'merged.pkl'), ylim=4) exit() n_task = int(args.n_task) n = int(args.n) p = int(args.p) p_s = int(args.p_s) p_conf = int(args.p_conf) eps = float(args.eps) g = float(args.g) lambd = float(args.lambd) lambd_test = float(args.lambd_test)
def diffuse_ul(): if request.method == 'GET': user_id = assign_cookie_value(request, COOKIE_KEY) print request.cookies raw_data = request.args data = json.loads(json.dumps(raw_data)) model_type = data['model'] seed_num = int(data['seeds']) indicator = data['indicator'] net_dict = user_records.find_one({'user': user_id}, { 'diffusionUl.network': 1, '_id': 0 }) seeds = user_records.find_one({'user': user_id}, { 'diffusionUl.seeds': 1, '_id': 0 }) if net_dict is None: results = { 'error': 'A valid network has to be generated for this operation!' } flag = update_modification_time(user_id) response = make_response(json.dumps(results), 404) else: net = transform.dict2graph(net_dict['diffusionUl']['network']) if seeds is None: seeds = n.seeds(net, seed_num, indicator) flag = user_records.update_one({'user': user_id}, { '$set': { 'diffusionUl.seeds': seeds }, '$currentDate': { 'lastUpdate': { '$type': 'date' } } }) print flag else: seeds = seeds['diffusionUl']['seeds'] if model_type == 'LTM': threshold = float(data['threshold']) diffused = d.LTM(net, seeds, threshold) new_net = u.merge_results( net_dict['diffusionUl']['network'], seeds=seeds, opinion_leaders=None, communities=None, diffused=diffused) else: pb_leaders = float(data['pb-leaders']) pb_normal = float(data['pb-normal']) scale = data['scale'] ratio = float(data['ratio']) communities = user_records.find_one( {'user': user_id}, { 'diffusionUl.communities': 1, '_id': 0 }) opinion_leaders = user_records.find_one( {'user': user_id}, { 'diffusionUl.opinionLeaders': 1, '_id': 0 }) if communities is None: communities = n.community_cnm(net) flag = user_records.update_one({'user': user_id}, { '$set': { 'diffusionUl.communities': communities }, '$currentDate': { 'lastUpdate': { '$type': 'date' } } }) print flag else: communities = communities['diffusionUl']['communities'] if opinion_leaders is None: if scale == 'R': opinion_leaders = n.find_transfer_sources( communities, ratio) else: opinion_leaders = n.rnd_seeds( net.GetNodes(), int(net.GetNodes() * ratio)) flag = user_records.update_one({'user': user_id}, { '$set': { 'diffusionUl.opinionLeaders': opinion_leaders }, '$currentDate': { 'lastUpdate': { '$type': 'date' } } }) print flag else: opinion_leaders = opinion_leaders['diffusionUl'][ 'opinionLeaders'] diffused = d.ICM(net, seeds, opinion_leaders, pb_leaders, pb_normal) new_net = u.merge_results( net_dict['diffusionUl']['network'], seeds=seeds, opinion_leaders=opinion_leaders, communities=communities, diffused=diffused) flag = update_modification_time(user_id) response = make_response(json.dumps(new_net), 200) return bind_cookie(response, COOKIE_KEY, user_id, COOKIE_DURATION)
def get_transfer_sources(): if request.method == 'GET': user_id = assign_cookie_value(request, COOKIE_KEY) print request.cookies raw_data = request.args data = json.loads(json.dumps(raw_data)) ratio = float(data['ratio']) scale = data['scale'] net_dict = user_records.find_one({'user': user_id}, { 'diffusionUl.network': 1, '_id': 0 }) if net_dict is None: results = { 'error': 'A valid network has to be generated for this operation!' } flag = update_modification_time(user_id) response = make_response(json.dumps(results), 404) else: net = transform.dict2graph(net_dict['diffusionUl']['network']) if scale == 'R': communities = user_records.find_one( {'user': user_id}, { 'diffusionUl.communities': 1, '_id': 0 }) if communities is None: communities = n.community_cnm(net) flag = user_records.update_one({'user': user_id}, { '$set': { 'diffusionUl.communities': communities }, '$currentDate': { 'lastUpdate': { '$type': 'date' } } }) print flag else: communities = communities['diffusionUl']['communities'] opinion_leaders = n.find_transfer_sources( communities, ratio) else: opinion_leaders = n.rnd_seeds(net.GetNodes(), int(net.GetNodes() * ratio)) new_net = u.merge_results(net_dict['diffusionUl']['network'], opinion_leaders=opinion_leaders) flag = user_records.update_one({'user': user_id}, { '$set': { 'diffusionUl.network': new_net, 'diffusionUl.opinionLeaders': opinion_leaders }, '$currentDate': { 'lastUpdate': { '$type': 'date' } } }) print flag response = make_response(json.dumps(new_net), 200) return bind_cookie(response, COOKIE_KEY, user_id, COOKIE_DURATION)
parser.add_argument('--min_l', default=50) parser.add_argument('--n_ul', default=100) args = parser.parse_args() save_dir = args.save_dir if not os.path.exists(save_dir): os.makedirs(save_dir) save_dir = os.path.join(save_dir, 'fig5_bottom') if not os.path.exists(save_dir): os.makedirs(save_dir) #If only plot is true, loads results, merges with MTL and plots again. if int(args.merge_mtl) == 1: utils.merge_results('results.pkl', 'mtl.pkl', 'mtl', save_dir) plotting.plot_mtl(os.path.join(save_dir, 'merged.pkl')) plotting.plot_mtl_mse(os.path.join(save_dir, 'merged.pkl')) exit() n_task = int(args.n_task) n = int(args.n) p = int(args.p) p_s = int(args.p_s) p_conf = int(args.p_conf) eps = float(args.eps) g = float(args.g) lambd = float(args.lambd) lambd_test = float(args.lambd_test)