Пример #1
0
def temp(table):
    sql = 'select shop_id from computer where shop_id is not null'
    result = list(database_util.search_sql(sql, None))[1]
    for i in result:
        shop_id = i[0]
        count = list(
            database_util.search_sql(
                'select count(*) from shop where shop_id=%s',
                shop_id)[1])[0][0]
        if count == 0:
            database_util.update_sql('insert into shop(shop_id) values(%s)',
                                     shop_id)
Пример #2
0
def get_shop_id(thread_name, queue, table):
    while not EXIT_FLAG:
        QUEUE_LOCK.acquire()
        if not WORK_QUEUE.empty():
            try:
                sku = queue.get()
                QUEUE_LOCK.release()
                url = 'https://item.m.jd.com/product/' + sku + '.html'
                print(thread_name, url)
                _spider = jd_spider.Spider()
                html_data = _spider.get_html(url)
                if html_data[0] != -1:
                    result = html_analysis.get_shop_id(html_data[1])
                else:
                    pass
                if result[0] != -1:
                    shop_id = result[1]
                    print("%s: shop_id %s" % (thread_name, shop_id))
                    sql = 'update ' + table + ' set shop_id=%s where sku=%s '
                    data = [shop_id, sku]
                    database_util.update_sql(sql, data)
                    count = list(
                        database_util.search_sql(
                            'select count(*) from shop where shop_id=%s',
                            shop_id)[1])[0][0]
                    if count == 0:
                        database_util.update_sql(
                            'insert into shop(shop_id) values(%s)', shop_id)
            except Exception as err:
                print(err)
                # print('thread_queue get_shop_id err:' + str(err))
        else:
            QUEUE_LOCK.release()
        time.sleep(1)
Пример #3
0
def get_brand(table):
    dictionary = FILE_PATH + 'train_files/dictionary.txt'
    file = open(dictionary, "a", encoding='utf-8')
    brand1 = ''
    brand2 = ''
    try:
        sql = 'select distinct brand from ' + table
        result = list(database_util.search_sql(sql, None))
        if result[0] == -1:
            return
        for j in result[1]:
            if len(j[0]) == 0:
                continue
            line = j[0].strip()
            if line.find('(') >= 0:
                brand1 = line.split('(')[0]
                brand2 = line.split('(')[1]
                brand2 = brand2.split(')')[0]
            file.write(brand1 + '\n' + brand2 + '\n')
            print(brand1, brand2)
    except Exception as err:
        print('tran_util get_brand err: %s' % (str(err)))
    finally:
        file.close()
    file_util.del_duplicate(dictionary)
Пример #4
0
def update_shop_info(table):
    sql = 'SELECT shop_id FROM shop where TO_DAYS(NOW()) - TO_DAYS(update_time) >=1'
    result = database_util.search_sql(sql, None)
    shop_id = []
    if result[0] != -1:
        id = list(result[1])
        for i in id:
            shop_id.append(i[0])
    thread_queue.fill_queue(shop_id)
    thread_queue.use_threading(['update_shop_info', table])

    sql = 'select brand,follow from ' + table + ' group by brand order by follow'
    result = database_util.search_sql(sql, None)
    if result[0] != -1:
        result = list(result[1])
    for i in result:
        sql = 'update ' + table + ' set brand_hot=%s where brand=%s'
        data = [i[1], i[0]]
        database_util.update_sql(sql, data)
Пример #5
0
def get_sku(table):
    sql = 'select url,id from ' + table
    result = database_util.search_sql(sql, None)
    if result[0] != -1:
        result = list(result[1])
        for i in result:
            sku = i[0].strip('https://item.jd.com/').strip('.html')
            id = i[1]
            sql = 'update ' + table + ' set sku=%s where id=%s'
            database_util.update_sql(sql, [sku, id])
Пример #6
0
def get_sentiment_score(table):
    sql = 'select sku from ' + table + ' where update_unreal_time is not null'
    result = database_util.search_sql(sql, None)
    sku_list = []
    if result[0] != -1:
        times = list(result[1])
        for i in times:
            sku_list.append(i[0])
    thread_queue.fill_queue(sku_list)
    thread_queue.use_threading(['get_sentiment_score', table])
Пример #7
0
def unify_brand(table):
    sql = 'select sku,brand from ' + table + ' where brand=%s'
    result = database_util.search_sql(sql, '360手机')
    if result[0] != -1:
        result = list(result[1])
    for i in result:
        sql = 'update ' + table + ' set brand=%s where sku=%s'
        sku = i[0]
        data = ['360', sku]
        database_util.update_sql(sql, data)
        print(i[0])
Пример #8
0
def get_param(table):
    # 把店铺关注人数少的商品删掉
    sql = 'delete from '+table+' where sku in (select a.sku from (select a.sku from '+table+' a,shop b where a.shop_id=b.shop_id and b.follow<10000) a)'
    database_util.update_sql(sql,None)

    sql = 'SELECT url FROM '+table+' where update_time is null';
    result = list(database_util.search_sql(sql, None)[1])
    url_list = []
    for i in result:
        url_list.append(i[0])
    thread_queue.fill_queue(url_list)
    thread_queue.use_threading(['get_param',table])
Пример #9
0
def get_shop_id(table):
    sql = 'SELECT sku FROM ' + table + ' where shop_id is null'
    result = database_util.search_sql(sql, None)
    sku = []
    if result[0] != -1:
        id = list(result[1])
        for i in id:
            if i[0] is not None:
                sku.append(i[0])
            else:
                print("sku is null")
    thread_queue.fill_queue(sku)
    thread_queue.use_threading(['get_shop_id', table])
Пример #10
0
def get_shop_info():
    sql = 'SELECT shop_id FROM shop where update_time is null';
    result = database_util.search_sql(sql, None)
    shop_id = []
    if result[0]!=-1:
        id = list(result[1])
        for i in id:
            if i[0] is not None:
                shop_id.append(i[0])
            else:
                print("shop_id is null")
    thread_queue.fill_queue(shop_id)
    thread_queue.use_threading(['update_shop_info',table])
Пример #11
0
def del_file(table):
    path_list = [
        DATA_PATH + table + '/item_comments/',
        DATA_PATH + table + '/useful_comments/'
    ]
    for file_path in path_list:
        for sku_name in os.listdir(file_path):
            sku = sku_name[0:sku_name.find('.')]
            sql = 'select shop_name from ' + table + ' where sku=%s'
            result = database_util.search_sql(sql, sku)
            if result[0] != -1:
                if len(result[1]) == 0:
                    print('deleted sku:%s' % (sku))
Пример #12
0
def get_comment(table):
    # sql = 'SELECT sku FROM '+table+ ' where follow>=10000 and comment>=3000 and comment<5000';
    sql = 'SELECT sku FROM ' + table + ' where update_comment_time is null'
    result = database_util.search_sql(sql, None)
    sku = []
    if result[0] != -1:
        id = list(result[1])
        for i in id:
            if i[0] is not None:
                sku.append(i[0])
            else:
                print("sku is null")
    thread_queue.fill_queue(sku)
    #第三个参数是要获取多少页的评论数据
    thread_queue.use_threading(['get_comment', table, 100])
Пример #13
0
def update_price(table):
    sql = 'SELECT sku,max_price,min_price,avg_price,price_times  FROM ' + table + ' where TO_DAYS(NOW()) - TO_DAYS(update_price_time) >=1'
    result = database_util.search_sql(sql, None)
    prices = []
    if result[0] != -1:
        times = list(result[1])
        for i in times:
            price = {}
            price['sku'] = i[0]
            price['max_price'] = float(i[1])
            price['min_price'] = float(i[2])
            price['avg_price'] = float(i[3])
            price['price_times'] = int(i[4])
            prices.append(price)
    thread_queue.fill_queue(prices)
    thread_queue.use_threading(['update_price', table])
Пример #14
0
def update_img(table):
    # https://img11.360buyimg.com/n5/s54x54_jfs/t5773/143/1465870132/216483/4bbce005/592692d8Nbcc8f248.jpg
    # https://img10.360buyimg.com/n7/jfs/t18772/89/1863054684/170815/d28ecae1/5adca3deN76bb61cb.jpg
    sql = 'select img,sku from ' + table
    result = database_util.search_sql(sql, None)
    if result[0] != -1:
        imgs = list(result[1])
        for i in imgs:
            img = i[0]
            sku = i[1]
            print(img)
            new_img = img.replace('n5/s54x54_jfs', 'n7/jfs')
            print(new_img + '\n')

            sql = 'update ' + table + ' set img=%s where sku=%s'
            data = [new_img, sku]
            database_util.update_sql(sql, data)
Пример #15
0
def insert_url(thread_name, queue, table):
    while not EXIT_FLAG:
        QUEUE_LOCK.acquire()
        if not WORK_QUEUE.empty():
            try:
                url = queue.get()
                QUEUE_LOCK.release()
                count = list(
                    database_util.search_sql(
                        'select count(*) url from ' + table + ' where url=%s',
                        url)[1])[0][0]
                if count == 0:
                    sql = 'insert into ' + table + ' set url=%s'
                    database_util.update_sql(sql, url)
            except Exception as err:
                print('thread_queue update_price err:' + str(err))
        else:
            QUEUE_LOCK.release()
        time.sleep(1)
Пример #16
0
def update_score(thread_name, queue, table, para):
    while not EXIT_FLAG:
        QUEUE_LOCK.acquire()
        if not WORK_QUEUE.empty():
            try:
                sku = queue.get()
                QUEUE_LOCK.release()
                w_rate = para['w_rate']
                w_follow = para['w_follow']
                w_comment = para['w_comment']
                w_sentiment = para['w_sentiment']
                w_brand = para['w_brand']

                sql = 'select sku,rate,follow,comment,sentiment,brand_hot from ' + table + ' where sku=%s'
                result = database_util.search_sql(sql, sku)
                if result[0] != -1:
                    result = list(result[1])
                    for i in result:
                        sku = i[0]
                        rate = float(i[1]) * 100
                        follow = int(i[2])
                        comment = int(i[3])
                        sentiment = int(i[4])
                        brand_hot = int(i[5])
                        score = round(
                            (rate * w_rate + follow * w_follow +
                             comment * w_comment + sentiment * w_sentiment +
                             brand_hot * w_brand), 2)
                        sql = 'update ' + table + ' set score=%s where sku=%s'
                        data = [score, sku]
                        database_util.update_sql(sql, data)

            except Exception as err:
                print('thread_queue update_score err:' + str(err))
        else:
            QUEUE_LOCK.release()
        time.sleep(1)