Exemplo n.º 1
0
def load_company_to_news():
    MySQL.execute_update('truncate table news.dtjk_company_src')
    config = {'host': '172.16.0.11',
              'user': '******',
              'password': '******',
              'port': 3306,
              'database': 'ljzxdb',
              'charset': 'utf8mb4'
              }

    connection = mysql.connector.connect(**config)
    cursor = connection.cursor()
    sql = "select CompanyName,MonitorDate from v_monitorcompany"
    cursor.execute(sql)
    while True:
        res = cursor.fetchmany(1000)
        values = []
        for row in res:
            mc = row[0]
            monitor_date = row[1]
            values.append((mc, monitor_date))

        if values:
            insert_sql = u"insert into news.dtjk_company_src(mc,monitor_date) values(%s,%s) on duplicate key update update_status=-1"
            MySQL.execute_many_update(insert_sql, values)
        if len(res) < 1000:
            break
Exemplo n.º 2
0
def load_company_test():
    config = {'host': '172.16.0.102',
              'user': '******',
              'password': '******',
              'port': 3306,
              'database': 'ljzxdb',
              'charset': 'utf8mb4'
              }

    connection = mysql.connector.connect(**config)
    cursor = connection.cursor()
    sql = "select CompanyName,AreaName from v_monitorcompany"
    cursor.execute(sql)

    last_update_date_dict = {}
    update_status_dict = {}
    sql_1 = "select mc,date(last_update_time),update_status from enterprise_credit_info.dtjk_company_src_test"
    res_1 = MySQL.execute_query(sql_1)
    for row in res_1:
        last_update_date_dict[row[0]] = row[1]
        update_status_dict[row[0]] = row[2]
    while True:
        res = cursor.fetchmany(1000)
        insert_args = []
        update_args = []
        for row in res:
            mc = row[0]
            province = row[1]
            # city = row[4]
            # print type(name), type(province), type(city)
            info(mc)
            if not mc.isdigit() and len(mc) > 1:
                # sql_2 = u"select date(last_update_time),update_status from enterprise_credit_info.dtjk_company_src_test where mc='%s'" % mc
                # res_2 = MySQL.execute_query(sql_2)
                if mc not in update_status_dict:
                    insert_args.append((mc, province))
                else:
                    if not(last_update_date_dict[mc] == datetime.date.today() and update_status_dict[mc] in (0, 1) or update_status_dict[mc] == -1):
                        update_args.append((mc,))
        if insert_args:
            insert_sql = u"insert into enterprise_credit_info.dtjk_company_src_test " \
                    u"values(%s,-1,null,'动态监控测试',%s,null)"
            MySQL.execute_many_update(insert_sql, insert_args)
        if update_args:
            update_sql = u"update enterprise_credit_info.dtjk_company_src_test set update_status=-1 where mc=%s"
            MySQL.execute_many_update(update_sql, update_args)
        if len(res) < 1000:
            break
    sql_3 = '''
                UPDATE dtjk_company_src_test d, yyzz y
                SET d.xydm = y.xydm
                WHERE d.xydm is null and d.mc = y.mc;
            '''
    MySQL.execute_update(sql_3)
Exemplo n.º 3
0
 def run(self):
     cnt_0 = 0
     cnt_1 = 0
     cnt_2 = 0
     while True:
         sql_1 = "select mc,province from " \
                 "(" \
                 "select * from %s where update_status=-1 " \
                 "and province='%s'" \
                 "limit 30 " \
                 ") t " \
                 "order by RAND() limit 1 " % (self.src_table, self.province)
         # print sql_1
         res_1 = MySQL.execute_query(sql_1)
         if len(res_1) > 0:
             mc = res_1[0][0]
             province = res_1[0][1]
             self.info(mc + '|' + province)
             sql_2 = "update %s set update_status=-2 " \
                     "where mc='%s'" \
                     % (self.src_table, mc)
             MySQL.execute_update(sql_2)
             try:
                 update_status = self.searcher.crawl(keyword=mc,
                                                     province=province)
                 sql_3 = "update %s set update_status=%d, last_update_time=now() " \
                         "where mc='%s'" % \
                         (self.src_table, update_status, mc)
             except Exception, e:
                 traceback.print_exc(e)
                 update_status = -1
                 self.info(str(e))
                 sql_3 = "update %s set update_status=%d,last_update_time=now() " \
                         "where mc='%s'" % \
                         (self.src_table, update_status, mc)
                 self.searcher.delete_tag_a_from_db(mc, province)
             MySQL.execute_update(sql_3)
             if update_status == 0:
                 cnt_0 += 1
             elif update_status == 1:
                 cnt_1 += 1
             else:
                 cnt_2 += 1
             self.info(u'查询有结果: %d, 查询无结果: %d, 查询失败:%d' %
                       (cnt_1, cnt_0, cnt_2))
         else:
             self.info(u'更新完毕')
             break
Exemplo n.º 4
0
    def run(self):
        cnt_0 = 0
        cnt_1 = 0
        cnt_2 = 0
        while True:
            sql_1 = "select mc,province from " \
                    "(" \
                    "select * from %s where update_status=-1 limit 30 " \
                    ") t " \
                    "order by RAND() limit 1" % self.src_table
            # print sql_1
            res_1 = MySQL.execute_query(sql_1)
            if len(res_1) > 0:
                mc = res_1[0][0]

                last_update_date = TimeUtils.get_today()

                sql_2 = "update %s set update_status=-2 " \
                        "where mc='%s'" \
                        % (self.src_table, mc)
                MySQL.execute_update(sql_2)
                try:
                    update_status = self.searcher.submit_search_request(
                        keyword=mc)
                    sql_3 = "update %s set update_status=%d, last_update_time=now() " \
                            "where mc='%s'" % \
                            (self.src_table, update_status, mc)
                except Exception, e:
                    self.info(traceback.format_exc(e))
                    update_status = -1
                    self.info(str(e))
                    sql_3 = "update %s set update_status=%d,last_update_time=now() " \
                            "where mc='%s'" % \
                            (self.src_table, update_status, mc)
                MySQL.execute_update(sql_3)
                if update_status == 0:
                    cnt_0 += 1
                elif update_status == 1:
                    cnt_1 += 1
                else:
                    cnt_2 += 1
                self.info(u'查询有结果: %d, 查询无结果: %d, 查询失败:%d' %
                          (cnt_1, cnt_0, cnt_2))
            else:
                self.info(u'更新完毕')
                break
Exemplo n.º 5
0
def load_dtjk_lsmc_src_test():
    global UPDATE_PERIOD
    config = {'host': '172.16.0.102',
              'user': '******',
              'password': '******',
              'port': 3306,
              'database': 'ljzxdb',
              'charset': 'utf8mb4'
              }

    connection = mysql.connector.connect(**config)
    cursor = connection.cursor()
    sql = u'''select CompanyName,AreaName from v_monitorcompany where MonitorDate in ('%s','%s')''' % (
        str(int(TimeUtils.get_today()[:4]) - 1) + TimeUtils.get_today()[4:],
        str(int(TimeUtils.get_yesterday()[:4]) - 1) + TimeUtils.get_yesterday()[4:],
    )
    cursor.execute(sql)
    while True:
        res = cursor.fetchmany(1000)
        params = []
        for row in res:
            mc = row[0]
            province = row[1]
            if not mc.isdigit() and len(mc) > 1:
                params.append((mc, province))
        if params:
            insert_sql = u"insert ignore into enterprise_credit_info.lsmc_src " \
                         u"values(%s,-1,null, null,%s)"
            MySQL.execute_many_update(insert_sql, params)

        if len(res) < 1000:
            break
    sql_3 = '''
                    UPDATE lsmc_src l, yyzz y
                    SET l.xydm = y.xydm
                    WHERE l.xydm is null and l.mc = y.mc;
                '''
    MySQL.execute_update(sql_3)
Exemplo n.º 6
0
def load_dtjk_nb_test():
    config = {'host': '172.16.0.102',
              'user': '******',
              'password': '******',
              'port': 3306,
              'database': 'ljzxdb',
              'charset': 'utf8mb4'
              }

    connection = mysql.connector.connect(**config)
    cursor = connection.cursor()
    sql = "select CompanyName,MonitorDate from v_monitorcompany where MonitorDate in ('%s','%s','%s')" \
          % (
              str(int(TimeUtils.get_today()[:4]) - 1) + TimeUtils.get_today()[4:],
              str(int(TimeUtils.get_yesterday()[:4]) - 1) + TimeUtils.get_yesterday()[4:],
              str(int(TimeUtils.get_the_day_before_yesterday()[:4]) - 1) + TimeUtils.get_the_day_before_yesterday()[4:]
          )
    # print sql
    cursor.execute(sql)
    res = cursor.fetchall()
    for row in res:
        mc = row[0]
        monitor_date = str(row[1])
        monitor_date = str(int(monitor_date[:4]) + 1) + monitor_date[4:] + ' 00:00:00'
        # info(mc)
        if not mc.isdigit() and len(mc) > 1:
            sql_2 = u"""select last_update_time from enterprise_credit_info.nb where entname='%s' and ancheyear='2016'
                                    union all
                                    select last_update_time from enterprise_credit_info.gtnb where traName='%s' and ancheyear='2016'
                                    union all
                                    select last_update_time from enterprise_credit_info.sfcnb where farSpeArtName='%s' and ancheyear='2016'
                        """ % (mc, mc, mc)
            res_2 = MySQL.execute_query(sql_2)
            if len(res_2) == 0:
                sql_3 = u"replace into enterprise_credit_info.dtjk_company_src_nb(mc,update_status,last_update_time) " \
                        u"values('%s',-1,'%s')" % (mc, monitor_date)
                MySQL.execute_update(sql_3)
Exemplo n.º 7
0
def load_zhangjiang():
    config = {'host': '172.16.0.11',
              'user': '******',
              'password': '******',
              'port': 3306,
              'database': 'dataterminaldbzj',
              'charset': 'utf8mb4'
              }
    connection = mysql.connector.connect(**config)
    cursor = connection.cursor()
    sql = "select * from monitortodaycompany"
    cursor.execute(sql)
    res = cursor.fetchall()
    # MySQL.execute_update("truncate table law.dtjk_company_src")
    for row in res:
        mc = row[1]
        if not mc.isdigit() and len(mc) > 1:
            sql2 = u"insert into enterprise_credit_info.dtjk_company_src values('%s',-1,null,null,'张江')" % mc
            # print sql2
            try:
                MySQL.execute_update(sql2)
            except mysql.connector.errors.IntegrityError:
                sql3 = "update enterprise_credit_info.dtjk_company_src set update_status=-1 where mc='%s'" % mc
                MySQL.execute_update(sql3)
Exemplo n.º 8
0
    def run(self):
        # cnt_0 = 0
        # cnt_1 = 0
        # cnt_2 = 0
        # cnt_999 = 0
        fail_dict = dict()
        update_result = {u'更新成功': 0, u'查无结果': 0, u'更新失败': 0, u'未上线': 0}

        while True:
            # print json.dumps(fail_dict, ensure_ascii=False)
            sql_1 = "select mc,province,xydm from " \
                    "(" \
                    "select * from %s where update_status=-1 order by last_update_time limit 30 " \
                    ") t " \
                    "order by RAND() limit 1" % self.src_table
            # print sql_1
            res_1 = MySQL.execute_query(sql_1)
            if len(res_1) > 0:
                mc = res_1[0][0]
                province = res_1[0][1]
                xydm = res_1[0][2]
                print mc, province
                self.info(mc + '|' + province)
                sql_2 = "update %s set update_status=-2,last_update_time=now() " \
                        "where mc='%s'" \
                        % (self.src_table, mc)
                MySQL.execute_update(sql_2)
                try:
                    if province in self.online_province:
                        if province in (
                                u'河北省',
                                u'宁夏回族自治区',
                                u'河南省',
                                u'海南省',
                                u'重庆市',
                                u'江西省',
                                u'贵州省',
                                u'湖南省',
                                u'陕西省',
                                u'山西省',
                                u'黑龙江省',
                                u'吉林省',
                                u'内蒙古自治区',
                                u'广西壮族自治区',
                                u'云南省',
                                u'西藏自治区',
                                u'青海省',
                                u'新疆维吾尔自治区',
                                u'甘肃省',
                                u'工商总局',
                                u'浙江省',
                                u'江苏省',
                                u'广东省',
                                u'上海市',
                                # u''
                        ) and check(xydm):
                            keyword = xydm
                        else:
                            keyword = mc
                        update_status = self.searcher.crawl(keyword=keyword,
                                                            province=province)
                    else:
                        update_status = 999
                    sql_3 = "update %s set update_status=%d, last_update_time=now() " \
                            "where mc='%s'" % \
                            (self.src_table, update_status, mc)
                    if mc in fail_dict:
                        fail_dict.pop(mc)
                except Exception, e:
                    # traceback.print_exc(e)
                    self.info(traceback.format_exc(e))
                    if fail_dict.get(mc, 0) > 10:
                        update_status = 3
                        if mc in fail_dict:
                            fail_dict.pop(mc)
                    else:
                        update_status = -1
                        fail_dict[mc] = fail_dict.get(mc, 0) + 1
                    # self.info(str(e))
                    sql_3 = "update %s set update_status=%d " \
                            "where mc='%s'" % \
                            (self.src_table, update_status, mc)
                    self.searcher.delete_tag_a_from_db(mc, province)
                MySQL.execute_update(sql_3)
                # print 'update_status', update_status
                if update_status == 0:
                    update_result[u'查无结果'] += 1
                elif update_status == 1:
                    update_result[u'更新成功'] += 1
                elif update_status == 999:
                    update_result[u'未上线'] += 1
                else:
                    update_result[u'更新失败'] += 1
                self.info(json.dumps(update_result, ensure_ascii=False))
            else:
                self.info(u'更新完毕')
                break
    def run(self):
        # cnt_0 = 0
        # cnt_1 = 0
        # cnt_2 = 0
        # cnt_999 = 0
        fail_dict = dict()
        update_result = {u'更新成功': 0, u'查无结果': 0, u'更新失败': 0, u'未上线': 0}

        while True:
            # print json.dumps(fail_dict, ensure_ascii=False)
            sql_1 = "select mc,province from " \
                    "(" \
                    "select * from %s where update_status=-1 order by last_update_time limit 30 " \
                    ") t " \
                    "order by RAND() limit 1" % self.src_table
            # print sql_1
            res_1 = MySQL.execute_query(sql_1)
            if len(res_1) > 0:
                mc = res_1[0][0].decode('utf-8')
                province = res_1[0][1].decode('utf-8')
                # print mc, province
                self.info(mc + '|' + province)
                sql_2 = "update %s set update_status=-2,last_update_time=now() " \
                        "where mc='%s'" \
                        % (self.src_table, mc)
                MySQL.execute_update(sql_2)
                try:
                    if province in self.online_province:
                        update_status = self.searcher.crawl(keyword=mc,
                                                            province=province)
                    else:
                        update_status = 999
                    sql_3 = "update %s set update_status=%d, last_update_time=now() " \
                            "where mc='%s'" % \
                            (self.src_table, update_status, mc)
                    if mc in fail_dict:
                        fail_dict.pop(mc)
                except Exception, e:
                    # traceback.print_exc(e)
                    self.info(traceback.format_exc(e))
                    if fail_dict.get(mc, 0) > 10:
                        update_status = 3
                        if mc in fail_dict:
                            fail_dict.pop(mc)
                    else:
                        update_status = -1
                        fail_dict[mc] = fail_dict.get(mc, 0) + 1
                    # self.info(str(e))
                    sql_3 = "update %s set update_status=%d " \
                            "where mc='%s'" % \
                            (self.src_table, update_status, mc)
                    self.searcher.delete_tag_a_from_db(mc, province)
                MySQL.execute_update(sql_3)
                # print 'update_status', update_status
                if update_status == 0:
                    update_result[u'查无结果'] += 1
                elif update_status == 1:
                    update_result[u'更新成功'] += 1
                elif update_status == 999:
                    update_result[u'未上线'] += 1
                else:
                    update_result[u'更新失败'] += 1
                self.info(json.dumps(update_result, ensure_ascii=False))
            else:
                self.info(u'更新完毕')
                time.sleep(5 * 60)