示例#1
0
def get_all_forum_pages(dict_name, start_date, end_date):
    start_timestamps, end_timestamps = get_time(start_date, end_date)
    if start_timestamps == 0 or end_timestamps == 0:
        return None
    list = []
    for item in dict_name.keys():
        dict = {}
        dict["term"] = {"keywords": dict_name[item]}
        list.append(dict)
    query = {
        "query": {
            "bool": {
                "must": [{
                    "range": {
                        "timestamps": {
                            "gte": start_timestamps,
                            "lte": end_timestamps
                        }
                    }
                }, {
                    "bool": {
                        "should": list
                    }
                }]
            }
        },
        "from": 0,
        "size": 10000,
        "sort": [],
        "aggs": {}
    }
    result = es.search(index=es_forum_index,
                       doc_type=es_forum_type,
                       body=query)['hits']['hits']
    return (len(result))
示例#2
0
def get_forum_pages(name, start_date, end_date):
    start_timestamps, end_timestamps = get_time(start_date, end_date)
    if start_timestamps == 0 or end_timestamps == 0:
        return None
    query = {
        "query": {
            "bool": {
                "must": [{
                    "range": {
                        "timestamps": {
                            "gte": start_timestamps,
                            "lte": end_timestamps
                        }
                    }
                }, {
                    "term": {
                        "keywords": name
                    }
                }],
                "must_not": [],
                "should": []
            }
        },
        "from": 0,
        "size": 10000,
        "sort": [],
        "aggs": {}
    }
    result = es.search(index=es_forum_index,
                       doc_type=es_forum_type,
                       body=query)['hits']['hits']
    return len(result)
示例#3
0
def get_all_forum_data(dict_name, page_number, page_size, start_date,
                       end_date):
    start_from = (page_number - 1) * page_size
    start_timestamps, end_timestamps = get_time(start_date, end_date)
    if start_timestamps == 0 or end_timestamps == 0:
        return None
    list = []
    for item in dict_name.keys():
        dict = {}
        dict["term"] = {"keywords": dict_name[item]}
        list.append(dict)
    query = {
        "query": {
            "bool": {
                "must": [{
                    "range": {
                        "timestamps": {
                            "gte": start_timestamps,
                            "lte": end_timestamps
                        }
                    }
                }, {
                    "bool": {
                        "should": list
                    }
                }]
            }
        },
        "from": start_from,
        "size": page_size,
        "sort": [],
        "aggs": {}
    }
    result = es.search(index=es_forum_index,
                       doc_type=es_forum_type,
                       body=query)['hits']['hits']
    list = []
    for item in result:
        dict = {}
        dict["context"] = item["_source"]["context"]
        dict["title"] = item["_source"]["title"]
        dict["name"] = item["_source"]["keywords"]
        dict["author"] = item["_source"]["author"]
        dict["address"] = item["_source"]["address"]
        dict["comment"] = item["_source"]["comments"]
        dict["reviews"] = item["_source"]["reviews"]
        dict["source"] = item["_source"]["source"]
        dict["likes"] = item["_source"]["likes"]
        dict["buddha_operation"] = item["_source"]["buddha_operation"]
        dict["tread"] = item["_source"]["tread"]
        dict["time"] = item["_source"]["time"].split(" ")[0]
        list.append(dict)
    return list
示例#4
0
def get_popularity(start_time, end_time, dict_name):
    try:
        start_time, end_time = get_time(start_time, end_time)
        fb = get_fb_aver_link(dict_name, start_time, end_time)
        tw = get_tw_count(dict_name, start_time, end_time)
        news = get_news_count(dict_name, start_time, end_time)
        ptt = get_ptt_popularity(dict_name, start_time, end_time)
        if fb == 0:
            fb = {}
            for key, value in dict_name.items():
                fb[value] = 0
        if tw == 0:
            tw = {}
            for key, value in dict_name.items():
                tw[value] = 0
        if news == 0:
            news = {}
            for key, value in dict_name.items():
                news[value] = 0
        if ptt == 0:
            ptt = {}
            for key, value in dict_name.items():
                ptt[value] = 0
        count = 1
        dict = {}
        flat = 1
        sum = 0
        result_dict = {}
        for id in dict_name.keys():
            sorce = round((fb[dict_name[id]] * fb_weight +
                           ptt[dict_name[id]] * ptt_weight +
                           tw[dict_name[id]] * tw_weight +
                           news[dict_name[id]] * news_weight), 3)
            dict[dict_name[id]] = sorce
            sum += sorce
        for id in dict_name.keys():
            map_dict = {}
            map_dict["partisan"] = get_partisan(id)
            if count == len(dict_name):
                map_dict["sorce"] = round(flat, 3)
                result_dict[dict_name[id]] = map_dict
            else:
                sorce = dict[dict_name[id]] / sum
                map_dict["sorce"] = sorce
                result_dict[dict_name[id]] = map_dict
                flat -= sorce
                count += 1
        return result_dict
    except:
        return 0
示例#5
0
def get_information_news_statistics_count(start_time, end_time, dict_name):
    start_timestamps, end_timestamps = get_time(start_time, end_time)
    data_list = get_date(start_time, end_time)
    if start_timestamps == 0 or end_timestamps == 0 or dict_name == 0:
        return None
    statistics = {}
    dict = {}
    for item in dict_name.keys():
        count_list = []
        data_dict = {}
        name = dict_name[item]
        query = {
            "query": {
                "bool": {
                    "must": [{
                        "range": {
                            "timestamps": {
                                "gte": start_timestamps,
                                "lte": end_timestamps
                            }
                        }
                    }, {
                        "term": {
                            "keywords": name
                        }
                    }],
                    "must_not": [],
                    "should": []
                }
            },
            "from": 0,
            "size": 10000,
            "sort": [],
            "aggs": {}
        }
        result = es.search(index=es_news_index,
                           doc_type=es_news_type,
                           body=query)['hits']['hits']
        for item in data_list:
            data_dict[item] = 0
        for item in result:
            item_data = item["_source"]["time"].split(" ")[0]
            data_dict[item_data] = data_dict[item_data] + 1
        for key in data_dict.keys():
            count_list.append(data_dict[key])
        dict[name] = count_list
    statistics["person"] = dict
    statistics["date"] = data_list
    return statistics
示例#6
0
def get_news_data(name, page_number, page_size, start_date, end_date):
    start_from = (page_number - 1) * page_size
    start_timestamps, end_timestamps = get_time(start_date, end_date)
    if start_timestamps == 0 or end_timestamps == 0:
        return None
    query = {
        "query": {
            "bool": {
                "must": [{
                    "range": {
                        "timestamps": {
                            "gte": start_timestamps,
                            "lte": end_timestamps
                        }
                    }
                }, {
                    "term": {
                        "keywords": name
                    }
                }],
                "must_not": [],
                "should": []
            }
        },
        "from": start_from,
        "size": page_size,
        "sort": [],
        "aggs": {}
    }
    result = es.search(index=es_news_index, doc_type=es_news_type,
                       body=query)['hits']['hits']
    list = []
    for item in result:
        dict = {}
        dict["context"] = item["_source"]["context"]
        dict["source"] = item["_source"]["news_source"]
        dict["keywords"] = item["_source"]["keywords"]
        dict["title"] = item["_source"]["title"]
        dict["summary"] = item["_source"]["summary"]
        dict["url"] = item["_source"]["url"]
        dict["images"] = item["_source"]["images"]
        dict["time"] = item["_source"]["time"].split(" ")[0]
        list.append(dict)
    return list
示例#7
0
def get_facebook_trend(start_time, end_time, dict_name):
    list_data = get_date(start_time, end_time)
    start_time, end_time = get_time(start_time, end_time)
    series_dict = {}
    table_list = []
    upshot = {}
    for id, name in dict_name.items():
        link_dict = {}
        post_dict = {}
        for item in list_data:
            link_dict[item] = 0
            post_dict[item] = 0
        query = {"query": {"bool": {"must": [{"term": {"facebook_name": name}},
                                             {"range": {"timestamps": {"gte": start_time, "lte": end_time}}}]}},
                 "from": 0,
                 "size": 9999}
        result = es.search(index=es_facebook_index, doc_type=es_facebook_type, body=query)['hits']['hits']
        for item in result:
            date = get_timestamps_to_date(item["_source"]["timestamps"])
            post_dict[date] = post_dict[date] + 1
            link_dict[date] = link_dict[date] + int(item["_source"]["likes"])
        aver_list = []
        sum_post = 0
        sum_link = 0
        for item in list_data:
            post = int(post_dict[item])
            link = int(link_dict[item])
            if post == 0 or link == 0:
                aver_list.append(0)
            else:
                aver_list.append(int(link / post))
            sum_link += link
            sum_post += post
        series_dict[name] = aver_list
        dict = {}
        dict["name"] = name
        dict["total_likes"] = sum_link
        dict["total_post"] = sum_post
        dict["aver_likes"] = int(sum_link / sum_post)
        table_list.append(dict)
    upshot["echart"] = {"xAxis": list_data, "series": series_dict}
    upshot["table"] = table_list
    return upshot
示例#8
0
def get_twitter_data(name, page_number, page_size, start_date, end_date):
    start_from = (page_number - 1) * page_size
    start_timestamps, end_timestamps = get_time(start_date, end_date)
    query = {
        "query": {
            "bool": {
                "must": [{
                    "range": {
                        "timestamps": {
                            "gte": start_timestamps,
                            "lte": end_timestamps
                        }
                    }
                }, {
                    "term": {
                        "twitter_search": name
                    }
                }],
                "must_not": [],
                "should": []
            }
        },
        "from": start_from,
        "size": page_size,
        "sort": [],
        "aggs": {}
    }
    result = es.search(index=es_twitter_index,
                       doc_type=es_twitter_type,
                       body=query)['hits']['hits']
    list = []
    for item in result:
        dict = {}
        dict["context"] = item["_source"]["context"]
        dict["likes"] = item["_source"]["likes"]
        dict["name"] = item["_source"]["twitter_search"]
        dict["twitter_name"] = item["_source"]["twitter_name"]
        dict["comment"] = item["_source"]["comment"]
        dict["time"] = item["_source"]["time"].split(" ")[0]
        list.append(dict)
    return list
def get_candidates(dict_name):
    try:
        date = time.strftime("%Y-%m-%d")
        start_time, end_time = get_time(date, date)
        sum = 0
        dict = {}
        face_dict = {}
        face_sum = 0
        for item in dict_name.keys():
            name = dict_name[item]
            query = {
                "query": {
                    "bool": {
                        "must": [{
                            "term": {
                                "keywords": name
                            }
                        }, {
                            "range": {
                                "timestamps": {
                                    "gte": start_time,
                                    "lte": end_time
                                }
                            }
                        }]
                    }
                },
                "from": 0,
                "size": 9999
            }
            result = es.search(index=es_news_index,
                               doc_type=es_news_type,
                               body=query)['hits']['hits']
            new_count = len(result)
            query = {
                "query": {
                    "bool": {
                        "must": [{
                            "term": {
                                "twitter_search": name
                            }
                        }, {
                            "range": {
                                "timestamps": {
                                    "gte": start_time,
                                    "lte": end_time
                                }
                            }
                        }]
                    }
                },
                "from": 0,
                "size": 9999
            }
            result = es.search(index=es_twitter_index,
                               doc_type=es_twitter_type,
                               body=query)['hits']['hits']
            twitter_count = len(result)
            name_count = twitter_count + new_count
            sum += name_count
            dict[name] = name_count
            query = {
                "query": {
                    "bool": {
                        "must": [{
                            "term": {
                                "facebook_name": name
                            }
                        }, {
                            "range": {
                                "timestamps": {
                                    "gte": start_time,
                                    "lte": end_time
                                }
                            }
                        }]
                    }
                },
                "from": 0,
                "size": 9999
            }
            result = es.search(index=es_facebook_index,
                               doc_type=es_facebook_type,
                               body=query)['hits']['hits']
            facebook_count = 0
            for item in result:
                facebook_count += int(item["_source"]["likes"])
            face_sum += facebook_count
            face_dict[name] = facebook_count
        if sum == 0:
            pass
        else:
            flag = len(dict)
            duty = 1.0
            i = 1
            for item in dict.keys():
                if i != flag:
                    score = round(float(dict[item]) / sum, 3)
                    duty -= score
                    dict[item] = score
                else:
                    dict[item] = duty
        if face_sum == 0:
            pass
        else:
            flag = len(face_sum)
            duty = 1
            i = 1
            for item in face_dict.keys():
                if i != flag:
                    score = round(float(face_dict[item]) / face_sum, 3)
                    duty -= score
                    face_dict[item] = score
                else:
                    face_dict[item] = duty
        result_dict = {}
        result_dict["heat"] = dict
        result_dict["activity"] = face_dict
        return result_dict
    except Exception as e:
        return 0