def pdf(data): pll.plot_pdf_mul_data([data], ['Edge Weight'], ['r'], ['o'], labels=['Edge Weight'], linear_bins=False, central=False, fit=True, fitranges=[(1, 10000)])
def compore_distribution(field, feds, randoms, youngs): # print '---------------Compare ' + field + '---------------------' edcomm = statis_util.comm_stat(feds) rdcomm = statis_util.comm_stat(randoms) ygcomm = statis_util.comm_stat(youngs) ed_rdz = statis_util.ks_test(randoms, feds) ed_ygz = statis_util.ks_test(youngs, feds) yg_rdz = statis_util.ks_test(youngs, randoms) # if min(ed_rdz[2], ed_ygz[2])>yg_rdz[2]: print '%s & %.2f($\sigma$=%.2f) & %.2f($\sigma$=%.2f) & %.2f($\sigma$=%.2f) & %.2f%s & %.2f%s & %.2f%s \\\\' \ % (field, edcomm[2], edcomm[3], rdcomm[2], rdcomm[3], ygcomm[2], ygcomm[3], ed_rdz[2], pvalue(ed_rdz[3]), ed_ygz[2], pvalue(ed_ygz[3]), yg_rdz[2], pvalue(yg_rdz[3])) # print 'ED & ' + str(edcomm[0]) + ' & ' + str(edcomm[1]) \ # + ' & ' + str(edcomm[2]) + ' & ' + str(edcomm[3]) + '\\\\' # print 'Random &' + str(rdcomm[0]) + ' & ' + str(rdcomm[1]) \ # + ' & ' + str(rdcomm[2]) + ' & ' + str(rdcomm[3]) + '\\\\' # print 'Younger &' + str(ygcomm[0]) + ' & ' + str(ygcomm[1]) \ # + ' & ' + str(ygcomm[2]) + ' & ' + str(ygcomm[3]) + '\\\\' # print '\\hline' # print 'ks-test(Random, ED): & $n_1$: ' + str(ed_rdz[0]) + ' & $n_2$: ' + str(ed_rdz[1]) \ # + ' & ks-value: ' + str(ed_rdz[2]) + ' & p-value: ' + str(ed_rdz[3]) + '\\\\' # print 'ks-test(Younger, ED): & $n_1$: ' + str(ed_ygz[0]) + ' & $n_2$: ' + str(ed_ygz[1]) \ # + ' & ks-value: ' + str(ed_ygz[2]) + ' & p-value: ' + str(ed_ygz[3]) + '\\\\' # print 'ks-test(Younger, Random): & $n_1$: ' + str(yg_rdz[0]) + ' & $n_2$: ' + str(yg_rdz[1]) \ # + ' & ks-value: ' + str(yg_rdz[2]) + ' & p-value: ' + str(yg_rdz[3]) + '\\\\' plot.plot_pdf_mul_data([feds, randoms, youngs], field, ['--g', '--b', '--r'], ['s', 'o', '^'], ['ED', 'Random', 'Younger'], linear_bins=True, central=True, fit=False, fitranges=None, savefile=field + '.pdf')
def network_change(dbname, comname, netname): # filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}} # users = iot.get_values_one_field(dbname, comname, 'id', filter) # g1 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 2}) # g2 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 131}) # pickle.dump(g1, open('data/g1.pick', 'w')) # pickle.dump(g2, open('data/g2.pick', 'w')) g1 = pickle.load(open('data/g1.pick', 'r')) g2 = pickle.load(open('data/g2.pick', 'r')) # g1 = gt.load_network_subset(dbname, 'net', {'scraped_times': 2}) # g2 = gt.load_network_subset(dbname, 'net', {'scraped_times': 131}) gt.summary(g1) gt.summary(g1) gt.net_stat(g1) gt.net_stat(g2) # pt.pdf_plot_one_data(g1.indegree(), 'indegree', linear_bins=False, fit_start=1, fit_end=100) pt.plot_pdf_mul_data( [np.array(g1.indegree()) + 1, np.array(g2.indegree()) + 1], 'indegree', ['b', 'r'], ['o', '^'], ['G1', 'G2'], linear_bins=False, central=False, fit=True, savefile='indegree.pdf')
def profile_feature_stat(): # 'favourites_count' fields = ['friends_count', 'followers_count', 'statuses_count'] names = ['following', 'follower', 'tweet'] filter = {} fitranges = [[(200, 100000), (1000, 100000000), (800, 10000000)], [(700, 10000), (800, 10000000), (800, 1000000)], [(800, 100000), (20000, 10000000), (10000, 10000000)]] for i in range(len(fields)): field = fields[i] print '=====================', field feds = np.array(io.get_values_one_field('fed', 'scom', field, filter)) + 1 randoms = np.array( io.get_values_one_field('random', 'scom', field, filter)) + 1 youngs = np.array( io.get_values_one_field('young', 'scom', field, filter)) + 1 comm = statis_util.comm_stat(feds) print 'ED & ' + str(comm[0]) + ' & ' + str(comm[1]) \ + ' & ' + str(comm[2])+ ' & ' + str(comm[3]) + '\\\\' comm = statis_util.comm_stat(randoms) print 'Random &' + str(comm[0]) + ' & ' + str(comm[1]) \ + ' & ' + str(comm[2])+ ' & ' + str(comm[3])+ '\\\\' comm = statis_util.comm_stat(youngs) print 'Younger &' + str(comm[0]) + ' & ' + str(comm[1]) \ + ' & ' + str(comm[2])+ ' & ' + str(comm[3])+ '\\\\' print '\\hline' # z = statis_util.z_test(randoms, feds) # print 'z-test(Random, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \ # + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' # z = statis_util.z_test(youngs, feds) # print 'z-test(Younger, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$:' + str(z[1]) \ # + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' # z = statis_util.z_test(youngs, randoms) # print 'z-test(Younger, Random): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \ # + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' z = statis_util.ks_test(randoms, feds) print 'ks-test(Random, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \ + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' z = statis_util.ks_test(youngs, feds) print 'ks-test(Younger, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \ + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' z = statis_util.ks_test(youngs, randoms) print 'ks-test(Younger, Random): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \ + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' plot.plot_pdf_mul_data([feds, randoms, youngs], names[i], ['g', 'b', 'r'], ['s', 'o', '^'], ['ED', 'Random', 'Younger'], linear_bins=False, central=False, fit=True, fitranges=fitranges[i], savefile=field + '.pdf')
def plot_pdf(ed, rd, yg, mode='degree'): if mode == 'indegree': rddseq, ygdseq, eddseq = rd.indegree(), yg.indegree(), ed.indegree() elif mode == 'outdegree': rddseq, ygdseq, eddseq = rd.outdegree(), yg.outdegree(), ed.outdegree() else: edu, rdu, ygu = ed.as_undirected(), rd.as_undirected(), yg.as_undirected() rddseq, ygdseq, eddseq = rdu.outdegree(), ygu.outdegree(), edu.outdegree() plot.plot_pdf_mul_data([rddseq, ygdseq, eddseq], ['--bo', '--r^', '--ks'], mode, ['Random', 'Young', 'ED'], False)
def plot_pdf(ed, rd, yg, mode='degree'): if mode == 'indegree': rddseq, ygdseq, eddseq = rd.indegree(), yg.indegree(), ed.indegree() elif mode == 'outdegree': rddseq, ygdseq, eddseq = rd.outdegree(), yg.outdegree(), ed.outdegree() else: edu, rdu, ygu = ed.as_undirected(), rd.as_undirected( ), yg.as_undirected() rddseq, ygdseq, eddseq = rdu.outdegree(), ygu.outdegree( ), edu.outdegree() plot.plot_pdf_mul_data([rddseq, ygdseq, eddseq], ['--bo', '--r^', '--ks'], mode, ['Random', 'Young', 'ED'], False)
def profile_feature_stat(): # 'favourites_count' fields = ['friends_count', 'followers_count', 'statuses_count'] names = ['following', 'follower', 'tweet'] filter = {} fitranges = [[(200, 100000), (1000, 100000000), (800, 10000000)], [(700, 10000), (800, 10000000), (800, 1000000)], [(800, 100000), (20000, 10000000), (10000, 10000000)]] for i in range(len(fields)): field = fields[i] print '=====================', field feds = np.array(io.get_values_one_field('fed', 'scom', field, filter))+1 randoms = np.array(io.get_values_one_field('random', 'scom', field, filter))+1 youngs = np.array(io.get_values_one_field('young', 'scom', field, filter))+1 comm = statis_util.comm_stat(feds) print 'ED & ' + str(comm[0]) + ' & ' + str(comm[1]) \ + ' & ' + str(comm[2])+ ' & ' + str(comm[3]) + '\\\\' comm = statis_util.comm_stat(randoms) print 'Random &' + str(comm[0]) + ' & ' + str(comm[1]) \ + ' & ' + str(comm[2])+ ' & ' + str(comm[3])+ '\\\\' comm = statis_util.comm_stat(youngs) print 'Younger &' + str(comm[0]) + ' & ' + str(comm[1]) \ + ' & ' + str(comm[2])+ ' & ' + str(comm[3])+ '\\\\' print '\\hline' # z = statis_util.z_test(randoms, feds) # print 'z-test(Random, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \ # + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' # z = statis_util.z_test(youngs, feds) # print 'z-test(Younger, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$:' + str(z[1]) \ # + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' # z = statis_util.z_test(youngs, randoms) # print 'z-test(Younger, Random): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \ # + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' z = statis_util.ks_test(randoms, feds) print 'ks-test(Random, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \ + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' z = statis_util.ks_test(youngs, feds) print 'ks-test(Younger, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \ + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' z = statis_util.ks_test(youngs, randoms) print 'ks-test(Younger, Random): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \ + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\' plot.plot_pdf_mul_data([feds, randoms, youngs], names[i], ['g', 'b', 'r'], ['s', 'o', '^'], ['ED', 'Random', 'Younger'], linear_bins=False, central=False, fit=True, fitranges=fitranges[i], savefile=field+'.pdf')
def plot_bio(dbname, colname, fields, names): datas = list() for field in fields: datas.append( iot.get_values_one_field(dbname, colname, field, {field: { '$exists': True }})) plot.plot_pdf_mul_data(datas, 'Age', ['g-', 'b-', 'r-', 'k-'], ['s', 'o', '^', '*'], names, linear_bins=True, central=True, fit=False, fitranges=None, savefile='bmi' + '.pdf')
def network_change(dbname, comname, netname): # filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}} # users = iot.get_values_one_field(dbname, comname, 'id', filter) # g1 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 2}) # g2 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 131}) # pickle.dump(g1, open('data/g1.pick', 'w')) # pickle.dump(g2, open('data/g2.pick', 'w')) g1 = pickle.load(open('data/g1.pick', 'r')) g2 = pickle.load(open('data/g2.pick', 'r')) # g1 = gt.load_network_subset(dbname, 'net', {'scraped_times': 2}) # g2 = gt.load_network_subset(dbname, 'net', {'scraped_times': 131}) gt.summary(g1) gt.summary(g1) gt.net_stat(g1) gt.net_stat(g2) # pt.pdf_plot_one_data(g1.indegree(), 'indegree', linear_bins=False, fit_start=1, fit_end=100) pt.plot_pdf_mul_data([np.array(g1.indegree())+1, np.array(g2.indegree())+1], 'indegree', ['b', 'r'], ['o', '^'], ['G1', 'G2'], linear_bins=False, central=False, fit=True, savefile='indegree.pdf')
def plot_bio(dbname, colname, fields, names): datas = list() for field in fields: datas.append(iot.get_values_one_field(dbname, colname, field, {field: {'$exists': True}})) plot.plot_pdf_mul_data(datas, 'Age', ['g-', 'b-', 'r-', 'k-'], ['s', 'o', '^', '*'], names, linear_bins=True, central=True, fit=False, fitranges=None, savefile='bmi' + '.pdf')
def count_freque(): db = dbt.db_connect_no_auth("fed") bnet = db["bnet"] for i in xrange(5): print bnet.count({"type": i}) if __name__ == "__main__": # snap_comm('fed', 'bnet', 'fedtime') # count_freque() # purn_net('yg') # out_net_commudet('fed', 'bnet', 'fedtime') # rdcom = plot_communty('rd', 'scnet', 'rd2l', 'GROUP[ 74 ][ 2691 ]') rdcom = plot_communty("rd", "tnet", "rdtime", "GROUP[ 1539 ][ 4641 ]") # ygcom = plot_communty('yg', 'scnet', 'yg3l', 'GROUP[ 33 ][ 3883 ]') ygcom = plot_communty("yg", "tnet", "ygtime", "GROUP[ 2360 ][ 1966 ]") fed = nt.load_network("fed", "snet") nt.net_statis(rdcom) nt.net_statis(ygcom) nt.net_statis(fed) rddseq = sorted(nx.degree(rdcom).values(), reverse=True) ygdseq = sorted(nx.degree(ygcom).values(), reverse=True) eddseq = sorted(nx.degree(fed).values(), reverse=True) plot.plot_pdf_mul_data( [rddseq, ygdseq, eddseq], ["--bo", "--r^", "--ks"], "Degree", ["Random", "Young", "ED"], False )