Exemplo n.º 1
0
def get_users(xnr_user_no, nodes=None):
    print 'xnr_user_no:::', xnr_user_no
    if not nodes:
        #print 'get xnr es...'
        # result = xnr_es.search(index=save_index,doc_type=save_type,body={'query':{'match_all':{}},'size':999999})
        # result = result['hits']['hits']
        # uids = [i['_source']['uid'] for i in result]
        uids = create_xnr_targetuser(xnr_user_no)
    else:
        # print 'have input nodes...'
        uids = nodes
    print 'uids:::', len(uids)
    if uids:
        #print 'uid!!!!!!',uids
        print '#####'
        uid_list = [uids[i:i + 100] for i in xrange(0, len(uids), 100)]
        retweet_result = []
        for uid_item in uid_list:
            try:
                retweet_result_temp = user_es.mget(index=retweet_index,
                                                   doc_type=retweet_type,
                                                   body={'ids': uid_item},
                                                   _source=True)['docs']
                retweet_result.extend(retweet_result_temp)
            except:
                print 'uid error!!'
# comment_result = user_es.mget(index=comment_index, doc_type=comment_type,body={'ids':uids}, _source=True)['docs']
# print 'retweet_index::',retweet_index
# print 'retweet_result:::',retweet_result
# print 'comment_result:::',comment_result
        G = nx.Graph()
        # retweet_result.sort(key=lambda k:(json.loads(k['_source']['uid_retweet']).getvalues()),reverse=True)
        for i in retweet_result:
            # print 'i:',i
            if not i['found']:
                continue
            uid_retweet = json.loads(i['_source']['uid_retweet'])
            max_count = max([int(n) for n in uid_retweet.values()])
            G.add_weighted_edges_from([
                (i['_source']['uid'], j, float(uid_retweet[j]) / max_count)
                for j in uid_retweet.keys()
                if j != i['_source']['uid'] and j and i['_source']['uid']
            ])

    # for i in comment_result:
    #     print 'comment_i:',i
    # if not i['found']:
    #     continue
    # uid_comment = json.loads(i['_source']['uid_comment'])
    # max_count = max([int(n) for n in uid_comment.values()])
    # G.add_weighted_edges_from([(i['_source']['uid'],j,float(uid_comment[j])/max_count) for j in uid_comment.keys() if j != i['_source']['uid'] and j and i['_source']['uid']])

    #print 'GGG:::',G.number_of_nodes(),G.number_of_edges()

# print 'G_nodes::',G.neighbors(2)
        print 'G has compelete!'
    else:
        G = nx.Graph()
    return G
Exemplo n.º 2
0
def get_users(xnr_user_no, nodes=None):

    if not nodes:
        print 'get xnr es...'
        # result = xnr_es.search(index=save_index,doc_type=save_type,body={'query':{'match_all':{}},'size':999999})
        # result = result['hits']['hits']
        # uids = [i['_source']['uid'] for i in result]
        uids = create_xnr_targetuser(xnr_user_no)
    else:
        print 'have input nodes...'
        uids = nodes
    retweet_result = user_es.mget(index=retweet_index,
                                  doc_type=retweet_type,
                                  body={'ids': uids},
                                  _source=True)['docs']
    comment_result = user_es.mget(index=comment_index,
                                  doc_type=comment_type,
                                  body={'ids': uids},
                                  _source=True)['docs']

    G = nx.Graph()
    for i in retweet_result:
        # print 'i:',i
        if not i['found']:
            continue
        uid_retweet = json.loads(i['_source']['uid_retweet'])
        max_count = max([int(n) for n in uid_retweet.values()])
        G.add_weighted_edges_from([
            (i['_source']['uid'], j, float(uid_retweet[j]) / max_count)
            for j in uid_retweet.keys()
            if j != i['_source']['uid'] and j and i['_source']['uid']
        ])
    for i in comment_result:
        # print 'comment_i:',i
        if not i['found']:
            continue
        uid_comment = json.loads(i['_source']['uid_comment'])
        max_count = max([int(n) for n in uid_comment.values()])
        G.add_weighted_edges_from([
            (i['_source']['uid'], j, float(uid_comment[j]) / max_count)
            for j in uid_comment.keys()
            if j != i['_source']['uid'] and j and i['_source']['uid']
        ])

    return G