示例#1
0
def pdf(data):
    pll.plot_pdf_mul_data([data], ['Edge Weight'], ['r'], ['o'],
                          labels=['Edge Weight'],
                          linear_bins=False,
                          central=False,
                          fit=True,
                          fitranges=[(1, 10000)])
示例#2
0
def compore_distribution(field, feds, randoms, youngs):
    # print '---------------Compare ' + field + '---------------------'
    edcomm = statis_util.comm_stat(feds)
    rdcomm = statis_util.comm_stat(randoms)
    ygcomm = statis_util.comm_stat(youngs)
    ed_rdz = statis_util.ks_test(randoms, feds)
    ed_ygz = statis_util.ks_test(youngs, feds)
    yg_rdz = statis_util.ks_test(youngs, randoms)
    # if min(ed_rdz[2], ed_ygz[2])>yg_rdz[2]:
    print '%s & %.2f($\sigma$=%.2f) & %.2f($\sigma$=%.2f) & %.2f($\sigma$=%.2f) & %.2f%s & %.2f%s & %.2f%s \\\\' \
          % (field, edcomm[2], edcomm[3], rdcomm[2], rdcomm[3], ygcomm[2], ygcomm[3], ed_rdz[2],
             pvalue(ed_rdz[3]), ed_ygz[2], pvalue(ed_ygz[3]), yg_rdz[2], pvalue(yg_rdz[3]))

    # print 'ED & ' + str(edcomm[0]) + ' & ' + str(edcomm[1]) \
    #       + ' & ' + str(edcomm[2]) + ' & ' + str(edcomm[3]) + '\\\\'
    # print 'Random &' + str(rdcomm[0]) + ' & ' + str(rdcomm[1]) \
    #       + ' & ' + str(rdcomm[2]) + ' & ' + str(rdcomm[3]) + '\\\\'
    # print 'Younger &' + str(ygcomm[0]) + ' & ' + str(ygcomm[1]) \
    #       + ' & ' + str(ygcomm[2]) + ' & ' + str(ygcomm[3]) + '\\\\'
    # print '\\hline'
    # print 'ks-test(Random, ED): & $n_1$: ' + str(ed_rdz[0]) + ' & $n_2$: ' + str(ed_rdz[1]) \
    #       + ' & ks-value: ' + str(ed_rdz[2]) + ' & p-value: ' + str(ed_rdz[3]) + '\\\\'
    # print 'ks-test(Younger, ED): & $n_1$: ' + str(ed_ygz[0]) + ' & $n_2$: ' + str(ed_ygz[1]) \
    #       + ' & ks-value: ' + str(ed_ygz[2]) + ' & p-value: ' + str(ed_ygz[3]) + '\\\\'
    # print 'ks-test(Younger, Random): & $n_1$: ' + str(yg_rdz[0]) + ' & $n_2$: ' + str(yg_rdz[1]) \
    #       + ' & ks-value: ' + str(yg_rdz[2]) + ' & p-value: ' + str(yg_rdz[3]) + '\\\\'

    plot.plot_pdf_mul_data([feds, randoms, youngs], field, ['--g', '--b', '--r'], ['s', 'o', '^'],
                           ['ED', 'Random', 'Younger'],
                           linear_bins=True, central=True, fit=False, fitranges=None, savefile=field + '.pdf')
示例#3
0
def network_change(dbname, comname, netname):
    # filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}}
    # users = iot.get_values_one_field(dbname, comname, 'id', filter)
    # g1 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 2})
    # g2 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 131})
    # pickle.dump(g1, open('data/g1.pick', 'w'))
    # pickle.dump(g2, open('data/g2.pick', 'w'))
    g1 = pickle.load(open('data/g1.pick', 'r'))
    g2 = pickle.load(open('data/g2.pick', 'r'))

    # g1 = gt.load_network_subset(dbname, 'net', {'scraped_times': 2})
    # g2 = gt.load_network_subset(dbname, 'net', {'scraped_times': 131})
    gt.summary(g1)
    gt.summary(g1)
    gt.net_stat(g1)
    gt.net_stat(g2)
    # pt.pdf_plot_one_data(g1.indegree(), 'indegree', linear_bins=False, fit_start=1, fit_end=100)
    pt.plot_pdf_mul_data(
        [np.array(g1.indegree()) + 1,
         np.array(g2.indegree()) + 1],
        'indegree', ['b', 'r'], ['o', '^'], ['G1', 'G2'],
        linear_bins=False,
        central=False,
        fit=True,
        savefile='indegree.pdf')
示例#4
0
def profile_feature_stat():
    # 'favourites_count'
    fields = ['friends_count', 'followers_count', 'statuses_count']
    names = ['following', 'follower', 'tweet']

    filter = {}
    fitranges = [[(200, 100000), (1000, 100000000), (800, 10000000)],
                 [(700, 10000), (800, 10000000), (800, 1000000)],
                 [(800, 100000), (20000, 10000000), (10000, 10000000)]]
    for i in range(len(fields)):
        field = fields[i]
        print '=====================', field
        feds = np.array(io.get_values_one_field('fed', 'scom', field,
                                                filter)) + 1
        randoms = np.array(
            io.get_values_one_field('random', 'scom', field, filter)) + 1
        youngs = np.array(
            io.get_values_one_field('young', 'scom', field, filter)) + 1

        comm = statis_util.comm_stat(feds)
        print 'ED & ' + str(comm[0]) + ' & ' + str(comm[1]) \
              + ' & ' + str(comm[2])+ ' & ' + str(comm[3]) + '\\\\'
        comm = statis_util.comm_stat(randoms)
        print 'Random &' + str(comm[0]) + ' & ' + str(comm[1]) \
              + ' & ' + str(comm[2])+ ' & ' + str(comm[3])+ '\\\\'
        comm = statis_util.comm_stat(youngs)
        print 'Younger &' + str(comm[0]) + ' & ' + str(comm[1]) \
              + ' & ' + str(comm[2])+ ' & ' + str(comm[3])+ '\\\\'
        print '\\hline'

        # z = statis_util.z_test(randoms, feds)
        # print 'z-test(Random, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \
        #       + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'
        # z = statis_util.z_test(youngs, feds)
        # print 'z-test(Younger, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$:' + str(z[1]) \
        #       + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'
        # z = statis_util.z_test(youngs, randoms)
        # print 'z-test(Younger, Random): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \
        #       + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'

        z = statis_util.ks_test(randoms, feds)
        print 'ks-test(Random, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \
              + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'
        z = statis_util.ks_test(youngs, feds)
        print 'ks-test(Younger, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \
              + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'
        z = statis_util.ks_test(youngs, randoms)
        print 'ks-test(Younger, Random): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \
              + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'

        plot.plot_pdf_mul_data([feds, randoms, youngs],
                               names[i], ['g', 'b', 'r'], ['s', 'o', '^'],
                               ['ED', 'Random', 'Younger'],
                               linear_bins=False,
                               central=False,
                               fit=True,
                               fitranges=fitranges[i],
                               savefile=field + '.pdf')
示例#5
0
文件: com_det.py 项目: wtgme/ohsn
def plot_pdf(ed, rd, yg, mode='degree'):
    if mode == 'indegree':
        rddseq, ygdseq, eddseq = rd.indegree(), yg.indegree(), ed.indegree()
    elif mode == 'outdegree':
        rddseq, ygdseq, eddseq = rd.outdegree(), yg.outdegree(), ed.outdegree()
    else:
        edu, rdu, ygu = ed.as_undirected(), rd.as_undirected(), yg.as_undirected()
        rddseq, ygdseq, eddseq = rdu.outdegree(), ygu.outdegree(), edu.outdegree()
    plot.plot_pdf_mul_data([rddseq, ygdseq, eddseq], ['--bo', '--r^', '--ks'], mode,  ['Random', 'Young', 'ED'], False)
示例#6
0
def plot_pdf(ed, rd, yg, mode='degree'):
    if mode == 'indegree':
        rddseq, ygdseq, eddseq = rd.indegree(), yg.indegree(), ed.indegree()
    elif mode == 'outdegree':
        rddseq, ygdseq, eddseq = rd.outdegree(), yg.outdegree(), ed.outdegree()
    else:
        edu, rdu, ygu = ed.as_undirected(), rd.as_undirected(
        ), yg.as_undirected()
        rddseq, ygdseq, eddseq = rdu.outdegree(), ygu.outdegree(
        ), edu.outdegree()
    plot.plot_pdf_mul_data([rddseq, ygdseq, eddseq], ['--bo', '--r^', '--ks'],
                           mode, ['Random', 'Young', 'ED'], False)
示例#7
0
def profile_feature_stat():
    # 'favourites_count'
    fields = ['friends_count', 'followers_count', 'statuses_count']
    names = ['following', 'follower', 'tweet']

    filter = {}
    fitranges = [[(200, 100000), (1000, 100000000), (800, 10000000)],
                     [(700, 10000), (800, 10000000), (800, 1000000)],
                     [(800, 100000), (20000, 10000000), (10000, 10000000)]]
    for i in range(len(fields)):
        field = fields[i]
        print '=====================', field
        feds = np.array(io.get_values_one_field('fed', 'scom', field, filter))+1
        randoms = np.array(io.get_values_one_field('random', 'scom', field, filter))+1
        youngs = np.array(io.get_values_one_field('young', 'scom', field, filter))+1

        comm = statis_util.comm_stat(feds)
        print 'ED & ' + str(comm[0]) + ' & ' + str(comm[1]) \
              + ' & ' + str(comm[2])+ ' & ' + str(comm[3]) + '\\\\'
        comm = statis_util.comm_stat(randoms)
        print 'Random &' + str(comm[0]) + ' & ' + str(comm[1]) \
              + ' & ' + str(comm[2])+ ' & ' + str(comm[3])+ '\\\\'
        comm = statis_util.comm_stat(youngs)
        print 'Younger &' + str(comm[0]) + ' & ' + str(comm[1]) \
              + ' & ' + str(comm[2])+ ' & ' + str(comm[3])+ '\\\\'
        print '\\hline'

        # z = statis_util.z_test(randoms, feds)
        # print 'z-test(Random, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \
        #       + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'
        # z = statis_util.z_test(youngs, feds)
        # print 'z-test(Younger, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$:' + str(z[1]) \
        #       + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'
        # z = statis_util.z_test(youngs, randoms)
        # print 'z-test(Younger, Random): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \
        #       + ' & z-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'

        z = statis_util.ks_test(randoms, feds)
        print 'ks-test(Random, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \
              + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'
        z = statis_util.ks_test(youngs, feds)
        print 'ks-test(Younger, ED): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \
              + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'
        z = statis_util.ks_test(youngs, randoms)
        print 'ks-test(Younger, Random): & $n_1$: ' + str(z[0]) + ' & $n_2$: ' + str(z[1]) \
              + ' & ks-value: ' + str(z[2])+ ' & p-value: ' + str(z[3])+ '\\\\'

        plot.plot_pdf_mul_data([feds, randoms, youngs], names[i], ['g', 'b', 'r'], ['s', 'o', '^'], ['ED', 'Random', 'Younger'],
                               linear_bins=False, central=False, fit=True, fitranges=fitranges[i], savefile=field+'.pdf')
示例#8
0
def plot_bio(dbname, colname, fields, names):
    datas = list()
    for field in fields:
        datas.append(
            iot.get_values_one_field(dbname, colname, field,
                                     {field: {
                                         '$exists': True
                                     }}))
    plot.plot_pdf_mul_data(datas,
                           'Age', ['g-', 'b-', 'r-', 'k-'],
                           ['s', 'o', '^', '*'],
                           names,
                           linear_bins=True,
                           central=True,
                           fit=False,
                           fitranges=None,
                           savefile='bmi' + '.pdf')
示例#9
0
文件: split_data.py 项目: wtgme/ohsn
def network_change(dbname, comname, netname):
    # filter = {'liwc_anal.result.i':{'$exists':True}, 'new_liwc_anal.result.i':{'$exists':True}}
    # users = iot.get_values_one_field(dbname, comname, 'id', filter)
    # g1 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 2})
    # g2 = gt.load_network_subset(users, dbname, netname, {'scraped_times': 131})
    # pickle.dump(g1, open('data/g1.pick', 'w'))
    # pickle.dump(g2, open('data/g2.pick', 'w'))
    g1 = pickle.load(open('data/g1.pick', 'r'))
    g2 = pickle.load(open('data/g2.pick', 'r'))

    # g1 = gt.load_network_subset(dbname, 'net', {'scraped_times': 2})
    # g2 = gt.load_network_subset(dbname, 'net', {'scraped_times': 131})
    gt.summary(g1)
    gt.summary(g1)
    gt.net_stat(g1)
    gt.net_stat(g2)
    # pt.pdf_plot_one_data(g1.indegree(), 'indegree', linear_bins=False, fit_start=1, fit_end=100)
    pt.plot_pdf_mul_data([np.array(g1.indegree())+1, np.array(g2.indegree())+1],
                           'indegree', ['b', 'r'], ['o', '^'], ['G1', 'G2'],
                               linear_bins=False, central=False, fit=True, savefile='indegree.pdf')
示例#10
0
def plot_bio(dbname, colname, fields, names):
    datas = list()
    for field in fields:
        datas.append(iot.get_values_one_field(dbname, colname, field, {field: {'$exists': True}}))
    plot.plot_pdf_mul_data(datas, 'Age', ['g-', 'b-', 'r-', 'k-'], ['s', 'o', '^', '*'],
                           names, linear_bins=True, central=True, fit=False, fitranges=None, savefile='bmi' + '.pdf')
示例#11
0
文件: tag_network.py 项目: wtgme/ohsn
def pdf(data):
    pll.plot_pdf_mul_data([data], ['Edge Weight'], ['r'], ['o'], labels=['Edge Weight'],
                          linear_bins=False, central=False, fit=True, fitranges=[(1, 10000)])
示例#12
0
文件: community.py 项目: wtgme/ohsn
def count_freque():
    db = dbt.db_connect_no_auth("fed")
    bnet = db["bnet"]
    for i in xrange(5):
        print bnet.count({"type": i})


if __name__ == "__main__":

    # snap_comm('fed', 'bnet', 'fedtime')
    # count_freque()

    # purn_net('yg')
    # out_net_commudet('fed', 'bnet', 'fedtime')

    # rdcom = plot_communty('rd', 'scnet', 'rd2l', 'GROUP[ 74 ][ 2691 ]')
    rdcom = plot_communty("rd", "tnet", "rdtime", "GROUP[ 1539 ][ 4641 ]")
    # ygcom = plot_communty('yg', 'scnet', 'yg3l', 'GROUP[ 33 ][ 3883 ]')
    ygcom = plot_communty("yg", "tnet", "ygtime", "GROUP[ 2360 ][ 1966 ]")
    fed = nt.load_network("fed", "snet")
    nt.net_statis(rdcom)
    nt.net_statis(ygcom)
    nt.net_statis(fed)
    rddseq = sorted(nx.degree(rdcom).values(), reverse=True)
    ygdseq = sorted(nx.degree(ygcom).values(), reverse=True)
    eddseq = sorted(nx.degree(fed).values(), reverse=True)
    plot.plot_pdf_mul_data(
        [rddseq, ygdseq, eddseq], ["--bo", "--r^", "--ks"], "Degree", ["Random", "Young", "ED"], False
    )