Пример #1
0
def query_major_rc():

    sql = 'SELECT TITLE,COUNT(TITLE),MOST_MONEY\
        from job\
        where info like "%软件测试%"\
        group by title having count(*) > 0 \
        ORDER BY COUNT(title) DESC\
        LIMIT 10\
        '

    cur = db_connect.Db_Connect(host='localhost',
                                user='******',
                                passwd='1234',
                                db='51job',
                                port=3306,
                                charset='utf8')
    cur.execute(sql)
    data_list = []
    res = list(cur.fetchall())
    res_sorted_by_money = money_int.Money_Int(res)
    for i in range(len(res_sorted_by_money)):
        data_list.append({
            'name': res_sorted_by_money[i][0],
            'value': (int(res_sorted_by_money[i][1] / 1000 / 12))
        })
    print(jsonify(data_list))
    return jsonify(data_list)
Пример #2
0
def Gensim_Search(text_url, keyword):
    if os.path.exists(text_url):
        pass
    else:
        text = ''
        cur = db_connect.Db_Connect(host='localhost',
                                    user='******',
                                    passwd='1234',
                                    db='51job',
                                    port=3306,
                                    charset='utf8')
        sql = 'select keyword from job'
        cur.execute(sql)

        for i in (cur.fetchall()):
            text = text + str(i[0])
        with open(text_url, 'w+', encoding='utf-8') as f:
            f.write(text)
    up_and_low.Up_And_Low(text_url)
    sentences = word2vec.Text8Corpus(text_url)
    model = word2vec.Word2Vec(sentences, size=200)
    model.save('./textmodel.model')
    try:
        most_similar_word = model.most_similar(keyword.upper())
        return most_similar_word
    except:
        return None
Пример #3
0
def query_talents():
    sql = 'SELECT title,COUNT(title) as titlenum\
            FROM job\
            WHERE info LIKE "%大数据%" and title LIKE "%师"\
            group by title having count(*) > 0\
            ORDER BY COUNT(title) DESC\
            LIMIT 10\
        '

    cur = db_connect.Db_Connect(host='localhost',
                                user='******',
                                passwd='1234',
                                db='51job',
                                port=3306,
                                charset='utf8')
    cur.execute(sql)
    data_dict = {}
    data_dict['names'] = []
    data_dict['values'] = []
    data_dict['extra'] = []
    res = list(cur.fetchall())
    num = 0
    for i in range(len(res)):
        num += res[i][1]
    for i in range(len(res)):
        data_dict['names'].append({"name": res[i][0]})
        data_dict['extra'].append(res[i][0])
    for i in range(len(res)):
        data_dict['values'].append(int((res[i][1])))
    # print(data_dict)
    return jsonify(data_dict)
Пример #4
0
def query_city():
    sql = 'SELECT place_province,count(place_province) as num\
            FROM job\
            WHERE info LIKE "%大数据%"\
            group by place_province having count(*) > 0\
            ORDER BY COUNT(title) DESC\
            LIMIT 10\
    '

    cur = db_connect.Db_Connect(host='localhost',
                                user='******',
                                passwd='1234',
                                db='51job',
                                port=3306,
                                charset='utf8')
    cur.execute(sql)
    data_list = []
    res = list(cur.fetchall())
    num = 0
    for i in range(len(res)):
        num += res[i][1]
    for i in range(len(res)):
        data_list.append({'name': res[i][0], 'value': int((res[i][1]))})
    # print(data_dict)
    return jsonify(data_list)
Пример #5
0
def query_demand_dmt():
    sql = 'SELECT TITLE,COUNT(TITLE)\
            from job\
            where info like "%多媒体%"\
            group by title having count(*) > 0\
            ORDER BY COUNT(title) DESC\
            LIMIT 10\
            '

    cur = db_connect.Db_Connect(host='localhost',
                                user='******',
                                passwd='1234',
                                db='51job',
                                port=3306,
                                charset='utf8')
    cur.execute(sql)
    data_list = []
    res = list(cur.fetchall())
    num = 0
    for i in range(len(res)):
        num += res[i][1]
    for i in range(len(res)):
        data_list.append({'name': res[i][0], 'value': int((res[i][1]))})
    # print(data_dict)
    return jsonify(data_list)
Пример #6
0
def query_salary_bj():
    sql = 'SELECT  place_city,least_money,most_money\
                FROM job\
                WHERE place_province="北京" AND place_city!="北京"\
                    '

    cur = db_connect.Db_Connect(host='localhost',
                                user='******',
                                passwd='1234',
                                db='51job',
                                port=3306,
                                charset='utf8')
    cur.execute(sql)
    data_dict = {}
    res = list(cur.fetchall())
    is_not_same = []

    res_sorted_by_least = money_least.Money_Least(res)
    res_sorted_by_most = money_most.Money_Most(res)

    res_fi1 = []
    res_fi2 = []

    for i in res_sorted_by_least:
        if i["地区名"] not in is_not_same:
            is_not_same.append(i["地区名"])
            res_fi1.append({"地区名": i["地区名"], "最低工资": i["最低工资"]})
    is_not_same = []
    for i in res_sorted_by_most:
        if i["地区名"] not in is_not_same:
            is_not_same.append(i["地区名"])
            res_fi2.append({"地区名": i["地区名"], "最高工资": i["最高工资"]})

    for i in range(len(res_fi1)):
        res_fi1[i]["最高工资"] = res_fi2[i]["最高工资"]
    #{'numsh':[最高],'numsl':[最低],'names':[名称]}
    data_dict['numsh'] = []
    data_dict['numsl'] = []
    data_dict['names'] = []
    for i in range(len(res_fi1)):

        data_dict['numsh'].append(res_fi1[i]['最高工资'])
        data_dict['numsl'].append(res_fi1[i]['最低工资'])
        data_dict['names'].append(res_fi1[i]['地区名'])
    # print(data_dict)
    data_dict['names'].remove('昌平区')
    del data_dict['numsh'][5]
    del data_dict['numsl'][5]
    print(data_dict)

    return jsonify(data_dict)
Пример #7
0
def query_location():
    sql = 'SELECT DISTINCT place_province,count(place_province)\
             FROM job\
             group by place_province having count(*) > 0\
            '

    cur = db_connect.Db_Connect(host='localhost',
                                user='******',
                                passwd='1234',
                                db='51job',
                                port=3306,
                                charset='utf8')
    cur.execute(sql)
    data_list = []
    res = list(cur.fetchall())
    num = 0
    for i in range(len(res)):
        num += res[i][1]
    for i in range(len(res)):
        data_list.append({'name': res[i][0], 'value': int((res[i][1]))})
    # print(data_dict)
    return jsonify(data_list)
Пример #8
0
def Resume(place,major,text):
    if text!='':
        cur = db_connect.Db_Connect(host='localhost', user='******', passwd='1234', db='51job', port=3306, charset='utf8')
        sql = "select distinct title,company,least_money,most_money,keyword,info from job where keyword like '%" + major + "%' or info like '%" + major + "%' or title like '%" + major + "%'  and place_province like '%" + place +"%' or place_city like '%" + place +"%' limit 100"

        cur.execute(sql)
        #resume是添加交集功能,
        data = cur.fetchall()

        doc_test=text
        #print(doc0,doc1,doc_test)
        all_doc_chara=[]
        all_doc=[]
        try:
            for i in data:
                all_doc.append(i[0]+'-!-!-'+i[1]+'-!-!-'+i[2]+'-!-!-'+i[3]+'-!-!-'+i[4].split(' ')[5])
                all_doc_chara.append("".join(re.sub(',|!|\?','',i[0].upper()))+"".join(re.sub(',|!|\?| ',' ',i[5].upper())))
        except:
            pass


        all_doc_list=[]
        for doc in all_doc_chara:
            doc_list=[word for word in jieba.cut(doc)]
            all_doc_list.append(doc_list)

        #print(all_doc_list)#原文本的分词列表
        doc_test_list=[word for word in jieba.cut(doc_test)]

        #print(doc_test_list)#测试文本的分词列表
        dictionary=corpora.Dictionary(all_doc_list)
        #print(dictionary.keys())#原文本的字典键
        #print(dictionary.token2id)#原文本键键名对应
        corpus=[dictionary.doc2bow(doc) for doc in all_doc_list]
        #print(corpus)#原文本键和出现次数[[(),()],[(),()]]
        doc_test_vec=dictionary.doc2bow(doc_test_list)
        #print(doc_test_vec)
        tfidf=models.TfidfModel(corpus)#不同的转化需要不同的参数,在TF-IDF转化中,训练的过程就是简单的遍历训练语料库(corpus),然后计算文档中每个特征的频率。
        #找到有特征性的词作为区分的标准,tf计算是局部的,idf计算是全局的
        #tf-idf算法是创建在这样一个假设之上的:对区别文档最有意义的词语应该是那些在文档中出现频率高,而在整个文档集合的其他文档中出现频率少的词语
        #但是在本质上idf是一种试图抑制噪声的加权,并且单纯地认为文本频率小的单词就越重要,文本频率大的单词就越无用,显然这并不是完全正确的。idf的简单结构并不能有效地反映单词的重要程度和特征词的分布情况,使其无法很好地完成对权值调整的功能,所以tf-idf法的精度并不是很高。
        index=similarities.SparseMatrixSimilarity(tfidf[corpus],num_features=len(dictionary.keys()))

        sim=index[tfidf[doc_test_vec]]
        # print(sim)
        res=sorted(enumerate(sim),key=lambda item:-item[1])
        res_list=[]
        child_res_dict={}
        child_res_dict['node']=[]
        child_res_dict['link']=[]
        print(res)
        # print(len(res))
        if len(res) <= 5:
            cou_num = 1
            for i in range(0,len(res)):

                if res[i][1] > 0.005:
                    node_dict1 = {'name': '序号:' + str(i) + ',公司名:' + all_doc[res[i][0]].split('-!-!-')[1],
                                  'value': '%.2f' % (res[i][1] * 1000), 'x': (cou_num + 1) * 10, 'y': 200,
                                  'symbolSize': size1,
                                  'label': {'normal': {'position': 'inside', 'fontSize': 10, 'color': '#FF6633'}},
                                  "draggable": "true"}

                    node_dict2 = {'name': '序号:' + str(i) + ',职位名:' + all_doc[res[i][0]].split('-!-!-')[0] + ',工资区间:' + \
                                          all_doc[res[i][0]].split('-!-!-')[2] + '-' + \
                                          all_doc[res[i][0]].split('-!-!-')[3],
                                  'value': '%.2f' % (res[i][1] * 1000), 'x': (cou_num + 1) * 10, 'y': 200,
                                  'symbolSize': size2,
                                  'label': {'normal': {'position': 'inside', 'fontSize': 10, 'color': '#FF6633'}},
                                  "draggable": "true"}

                    # node_dict3 = {'name': '序号:' + str(i) + ',关键词:' + all_doc[res[i][0]].split('-!-!-')[4],
                    #               'value': '%.2f' % (res[i][1] * 1000), 'x': (cou_num + 1) * 10, 'y': 200,
                    #               'symbolSize': size2,
                    #               'label': {'normal': {'position': 'inside', 'fontSize': 10, 'color': '#FF6633'}},
                    #               "draggable": "true"}
                    cou_num += 3

                    child_res_dict['node'].append(node_dict1)
                    child_res_dict['node'].append(node_dict2)
                    # child_res_dict['node'].append(node_dict3)

                    link_dict1 = {'source': '序号:' + str(i) + ',公司名:' + all_doc[res[i][0]].split('-!-!-')[1],
                                  'target': '序号:' + str(i) + ',公司名:' + all_doc[res[i][0]].split('-!-!-')[1],
                                  'name': '', 'value': '', 'label': '',
                                  'lineStyle': {"normal": {"width": 2.0, "curveness": 0.2, "color": '#FF6633'}}}
                    link_dict2 = {'source': '序号:' + str(i) + ',职位名:' + all_doc[res[i][0]].split('-!-!-')[0] + ',工资区间:' + \
                                            all_doc[res[i][0]].split('-!-!-')[2] + '-' + \
                                            all_doc[res[i][0]].split('-!-!-')[3],
                                  'target': '序号:' + str(i) + ',公司名:' + all_doc[res[i][0]].split('-!-!-')[1],
                                  'name': '', 'value': '', 'label': '',
                                  'lineStyle': {"normal": {"width": 2.0, "curveness": 0.2, "color": '#FF6633'}}}
                    # link_dict3 = {'source': '序号:' + str(i) + ',关键词:' + all_doc[res[i][0]].split('-!-!-')[4],
                    #               'target': '序号:' + str(i) + ',公司名:' + all_doc[res[i][0]].split('-!-!-')[1],
                    #               'name': '', 'value': '', 'label': '',
                    #               'lineStyle': {"normal": {"width": 2.0, "curveness": 0.2, "color": '#FF6633'}}}

                    child_res_dict['link'].append(link_dict1)
                    child_res_dict['link'].append(link_dict2)
                    # child_res_dict['link'].append(link_dict3)

            child_res_dict['node'].append({'name': '分析结果', 'value': '', 'x': 10, 'y': 200, 'symbolSize': size0,
                                           'label': {'normal': {'position': 'inside', 'fontSize': 14, 'color': '#FF6633'}},
                                           "draggable": "true"})
            copy_list = child_res_dict['link'].copy()
            for j in range(0, len(copy_list), 3):
                child_res_dict['link'].append({'source': copy_list[j]['source'],
                                               'target': child_res_dict['node'][len(child_res_dict['node']) - 1]['name'],
                                               'name': '', 'value': '', 'label': '', 'lineStyle': {
                        "normal": {"width": 2.0, "curveness": 0.2, "color": '#FF6633'}}})
            res_list.append(child_res_dict)
        else:
            cou_num = 1
            for i in range(0,8):

                if res[i][1] > 0.005:
                    node_dict1 = {'name': '序号:' + str(i) + ',公司名:' + all_doc[res[i][0]].split('-!-!-')[1],
                                      'value': '%.2f' % (res[i][1] * 1000), 'x': (cou_num+1)*10, 'y': 200,
                                      'symbolSize': size1,
                                      'label': {'normal': {'position': 'inside', 'fontSize': 10, 'color': '#FF6633'}},
                                      "draggable": "true"}

                    node_dict2 = {'name': '序号:' + str(i) + ',职位名:' + all_doc[res[i][0]].split('-!-!-')[0] + ',工资区间:' + \
                                         all_doc[res[i][0]].split('-!-!-')[2] + '-' + \
                                         all_doc[res[i][0]].split('-!-!-')[3],
                                      'value': '%.2f' % (res[i][1] * 1000), 'x': (cou_num + 1) * 10, 'y': 200,
                                      'symbolSize': size2,
                                      'label': {'normal': {'position': 'inside', 'fontSize': 10, 'color': '#FF6633'}},
                                      "draggable": "true"}


                    # node_dict3 = {'name': '序号:' + str(i) + ',关键词:' + all_doc[res[i][0]].split('-!-!-')[4],
                    #                   'value': '%.2f' % (res[i][1] * 1000), 'x': (cou_num + 1) * 10, 'y': 200,
                    #                   'symbolSize': size2,
                    #                   'label': {'normal': {'position': 'inside', 'fontSize': 10, 'color': '#FF6633'}},
                    #                   "draggable": "true"}
                    cou_num += 3

                    child_res_dict['node'].append(node_dict1)
                    child_res_dict['node'].append(node_dict2)
                    # child_res_dict['node'].append(node_dict3)




                    link_dict1 = {'source':'序号:' + str(i) + ',公司名:' + all_doc[res[i][0]].split('-!-!-')[1],
                                        'target': '序号:' + str(i) + ',公司名:' + all_doc[res[i][0]].split('-!-!-')[1],
                                        'name': '', 'value': '', 'label': '','lineStyle':{"normal": {"width": 2.0, "curveness": 0.2, "color": '#FF6633'}}}
                    link_dict2 = {'source': '序号:' + str(i) + ',职位名:' + all_doc[res[i][0]].split('-!-!-')[0] + ',工资区间:' + \
                                         all_doc[res[i][0]].split('-!-!-')[2] + '-' + \
                                         all_doc[res[i][0]].split('-!-!-')[3],
                                          'target': '序号:' + str(i) + ',公司名:' + all_doc[res[i][0]].split('-!-!-')[1],
                                          'name': '', 'value': '', 'label': '','lineStyle':{"normal": {"width": 2.0, "curveness": 0.2, "color": '#FF6633'}}}
                    link_dict3 = {'source': '序号:' + str(i) + ',关键词:' + all_doc[res[i][0]].split('-!-!-')[4],
                                          'target': '序号:' + str(i) + ',公司名:' + all_doc[res[i][0]].split('-!-!-')[1],
                                          'name': '', 'value': '', 'label': '','lineStyle':{"normal": {"width": 2.0, "curveness": 0.2, "color": '#FF6633'}}}



                    child_res_dict['link'].append(link_dict1)
                    child_res_dict['link'].append(link_dict2)
                    child_res_dict['link'].append(link_dict3)

            child_res_dict['node'].append({'name': '分析结果', 'value': '', 'x': 10, 'y': 200, 'symbolSize': size0,
                                           'label': {'normal': {'position': 'inside', 'fontSize': 14, 'color': '#FF6633'}},
                                           "draggable": "true"})
            copy_list = child_res_dict['link'].copy()
            for j in range(0, len(copy_list), 3):
                child_res_dict['link'].append({'source': copy_list[j]['source'],
                                               'target': child_res_dict['node'][len(child_res_dict['node']) - 1]['name'],
                                               'name': '', 'value': '', 'label': '','lineStyle':{"normal": {"width": 2.0, "curveness": 0.2, "color": '#FF6633'}}})
            res_list.append(child_res_dict)



        return res_list
    else:
        return []