示例#1
0
def para_replace(city, defense_name, checkin, ratio, step):
    ''' para replace
    Args:
        city: city
        defense_name: 20_replace_20
        checkin: checkin data
        ratio: obfuscate proportion, 0.1, 0.2, 0.3 ...
        step: step length, (odd)
    Returns:
        checkin: checkin data
    '''
    # intialize
    pd.DataFrame([checkin.columns]).to_csv('dataset/'+city+'/defense/'+\
                 city+'_'+defense_name+'.checkin', index=False, header=None)

    ul_graph, lu_graph = ul_graph_build(checkin, 'locid')

    core_num = mp.cpu_count()
    print(core_num)

    Parallel(n_jobs = core_num)(delayed(replace_core)(\
        city, defense_name, checkin.loc[checkin.uid==u], ul_graph, lu_graph, ratio, step) for u in checkin.uid.unique())

    checkin = pd.read_csv('dataset/'+city+'/defense/'+city+'_'+defense_name+'.checkin',\
                          error_bad_lines=False)
    checkin = checkin.dropna().reset_index(drop=True)
    checkin.uid = checkin.uid.apply(float).apply(int)
    checkin.locid = checkin.locid.apply(float).apply(int)

    checkin = checkin.reset_index(drop=True)

    checkin.to_csv('dataset/'+city+'/defense/'+city+'_'+defense_name+'.checkin',\
                   index=False)

    return checkin
示例#2
0
def single_run(city, cicnt=20, wl=100, wt=20, n_feature=128, new_run=False):
    # city = "Brightkite"# ny la london Gowalla Brightkite
    # cicnt = 20

    folder_setup(city)
    checkin, friends = data_process(city, cicnt)

    ul_graph, lu_graph = ul_graph_build(checkin, 'locid')

    model_name = str(cicnt) + '_locid'
    print(model_name)

    walk_len, walk_times = 100, 20  # maximal 100 walk_len, 20 walk_times

    print('walking')
    if new_run:
        para_ul_random_batch(city, model_name, checkin.uid.unique(), ul_graph,
                             lu_graph, walk_len, walk_times)
    print('walk done')

    print('emb training')
    emb_train(city, model_name, wl, wt, n_feature)
    print('emb training done')

    feature_construct(city, model_name, friends, wl, wt, n_feature)
    unsuper_friends_predict(city, model_name, wl, wt, n_feature)
示例#3
0
def single_run(city, cicnt, ratio):
    # ratio = int(sys.argv[3])# 10 20 30 40
    ratio = ratio * 1.0 / 100

    folder_setup(city)
    checkin, friends = data_process(city, cicnt)

    defense_name = str(cicnt) + '_hiding_' + str(int(ratio * 100))
    print(defense_name)

    checkin = para_hiding(city, defense_name, checkin, ratio)

    ul_graph, lu_graph = ul_graph_build(checkin, 'locid')

    model_name = str(cicnt) + '_locid_hiding_' + str(int(ratio * 100))
    print(model_name)

    walk_len, walk_times = 100, 20  # maximal 100 walk_len, 20 walk_times

    print('walking')
    para_ul_random_walk(city, model_name, checkin.uid.unique(), ul_graph,
                        lu_graph, walk_len, walk_times)
    print('walk done')

    print('emb training')
    emb_train(city, model_name)
    print('emb training done')

    feature_construct(city, model_name, friends)
    unsuper_friends_predict(city, model_name)
示例#4
0
def single_ex_run(city, cicnt):

    folder_setup(city)
    checkin, friends = data_process(city, cicnt)

    defense_name = str(cicnt) + '_ex'
    print(defense_name)

    checkin = extreme_balance(city, defense_name, checkin)

    ul_graph, lu_graph = ul_graph_build(checkin, 'locid')

    model_name = str(cicnt) + '_locid_ex'
    print(model_name)

    walk_len, walk_times = 100, 20  # maximal 100 walk_len, 20 walk_times

    print('walking')
    para_ul_random_batch(city, model_name, checkin.uid.unique(), ul_graph,
                         lu_graph, walk_len, walk_times)
    print('walk done')

    print('emb training')
    emb_train(city, model_name)
    print('emb training done')

    feature_construct(city, model_name, friends)
    unsuper_friends_predict(city, model_name)
示例#5
0
def batch_random_walk(num_user, num_location):
    city = 'workload_' + str(num_user) + '_' + str(num_location)
    folder_setup(city)
    cicnt = 20
    checkin, friends = data_process(city, cicnt)

    ul_graph, lu_graph = ul_graph_build(checkin, 'locid')

    model_name = str(cicnt) + '_locid'
    print(model_name)

    walk_len, walk_times = 50, 10  # maximal 100 walk_len, 20 walk_times

    print('walking')
    para_ul_random_batch(city, model_name, checkin.uid.unique(),
                         ul_graph, lu_graph, walk_len, walk_times)
    print('walk done')
示例#6
0
def single_replace(city, cicnt, ratio, step, fail_to_continue=False):
    ratio = ratio * 1.0 / 100

    folder_setup(city)
    checkin, friends = data_process(city, cicnt)

    defense_name = str(cicnt) + '_replace_' + str(int(
        ratio * 100)) + '_' + str(int(step))

    model_name = str(cicnt) + '_locid_replace_' + str(int(
        ratio * 100)) + '_' + str(int(step))

    if not fail_to_continue:
        checkin = para_replace(city, defense_name, checkin, ratio, step)
    else:
        checkin = pd.read_csv('dataset/'+ city + '/defense/' + city + \
                                              '_20_replace_'+
                              str(int(ratio * 100)) + '_' + str(int(step)) +
                              '.checkin')

    ul_graph, lu_graph = ul_graph_build(checkin, 'locid')

    walk_len, walk_times = 100, 20  # maximal 100 walk_len, 20 walk_times

    print('walking')
    if not fail_to_continue:
        para_ul_random_batch(city, model_name, checkin.uid.unique(), ul_graph,
                             lu_graph, walk_len, walk_times)
    print('walk done')

    print('emb training')
    emb_train(city, model_name)
    print('emb training done')

    feature_construct(city, model_name, friends)
    unsuper_friends_predict(city, model_name)
示例#7
0
city = sys.argv[1]
cicnt = int(sys.argv[2])
ratio = int(sys.argv[3])  # 10 20 30 40
ratio = ratio * 1.0 / 100
step = int(sys.argv[4])

folder_setup(city)
checkin, friends = data_process(city, cicnt)

defense_name = str(cicnt) + '_replace_' + str(int(ratio * 100)) + '_' + str(
    int(step))

checkin = para_replace(city, defense_name, checkin, ratio, step)

ul_graph, lu_graph = ul_graph_build(checkin, 'locid')

model_name = str(cicnt) + '_locid_replace_' + str(int(
    ratio * 100)) + '_' + str(int(step))

walk_len, walk_times = 100, 20  # maximal 100 walk_len, 20 walk_times

print 'walking'
para_ul_random_walk(city, model_name, checkin.uid.unique(), ul_graph, lu_graph,\
                    walk_len, walk_times)
print 'walk done'

print 'emb training'
emb_train(city, model_name)
print 'emb training done'