Exemplo n.º 1
0
    def set_other_info(self):
        high_date = get_date_with_day_duration(0)
        low_date = get_date_with_day_duration(-7)
        if self._duration == '1w':
            low_date = get_date_with_day_duration(-7)
        elif self._duration == '6m':
            low_date = get_date_with_day_duration(-180)

        low_date_str = "%d%02d%02d000000" % (low_date.year, low_date.month, low_date.day)
        high_date_str = "%d%02d%02d000000" % (high_date.year, high_date.month, high_date.day)
        self.time_duration = "{}_{}".format(low_date_str, high_date_str)
Exemplo n.º 2
0
    def report(self):

        today = get_date_with_day_duration(0)
        yesterday = get_date_with_day_duration(-1)
        today_str = "%d%02d%02d" % (today.year, today.month, today.day)
        yesterday_str = "%d%02d%02d" % (yesterday.year, yesterday.month, yesterday.day)

        msg1 = ''
        msg2 = ''
        for folder in self._channels:

            #被封渠道,不用计算, 加上提示信息
            if folder in Config.SEALED_CHANNELS:
                msg1 += "%s: N/A(%s)\n" % (folder,Config.PROMPT_INFO['sealed_info'].encode('utf-8'))
                msg2 += "N/A ==> N/A"
                continue

            today_rs = self.get_result_with_folder_and_date(folder, today_str)
            yesterday_rs = self.get_result_with_folder_and_date(folder, yesterday_str)

            if not today_rs:
                today_rs = 'N/A'
            if not yesterday_rs:
                yesterday_rs = 'N/A'

            is_normal = self.check_if_normal(today_rs, yesterday_rs)

            # 没有统计爬全率的渠道,均设置正常
            if folder in Config.CHANNELS_NOT_CALCURATE_FULL_RATE:
                is_normal = True

            if today_str != 'N/A':
                percent = today_rs.split(':')[-1]
            else:
                percent = 'N/A'

            msg1 += "%s: %s\n" % (folder, percent)
            msg2 += "%s ==> %s\n" % (yesterday_rs, today_rs)

            if not is_normal:
                msg2 = msg2[:-1] + "\t <数据变化异常>\n"

        msg = """时间:%s\n\n检查JD结果:\n %s \n\n 数据变化:\n%s""" % (today_str, msg1, msg2)

        sendmail(self._emails, self._email_title, msg)
Exemplo n.º 3
0
    def set_file_path(self):
        now = get_date_with_day_duration(0)
        now_str = "%d%02d%02d" % (now.year, now.month, now.day)

        self.id_fn = "%s/%s" % (self._owner, Config.IDX_FILE_NAME)
        self.result_fn = Config.RESULT_FOLDER_TEMPLATE % (self._owner, now_str)

        # 最近一周
        if self._duration == '1w':
            self.id_fn = "%s/%s" % (self._owner, Config.LATEST_ONE_WEEK_IDX_FILE_NAME)
            self.result_fn = Config.LATEST_ONE_WEEK_RESULT_FOLDER_TEMPLATE % (self._owner, now_str)
Exemplo n.º 4
0
    def event_handler(self, evt, msg, **kwargs):
        if evt == 'DONE':
            util.send_email(["<*****@*****.**>"], "{} 样本对比".format(self.channel), msg)

            # 最终结果存放文件
            filedest = 'app/share/%s_result.txt' % self.channel

            if os.path.exists(filedest):
                now = util.get_date_with_day_duration()
                history_fn = os.path.join(os.path.dirname(filedest), '%s_%s.txt' % (self.channel, '%d%02d%02d%02d%02d' % (now.year, now.month, now.day, now.hour, now.minute)))

                # 将结果移动到历史文件
                os.system('mv %s %s' % (filedest, history_fn))

            # 将最后计算结果放到share 目录下, 提供下载
            os.system('mv %s %s' % (self.result_file, filedest))
Exemplo n.º 5
0
    def get_bin_file_and_last_pos(self):

        # 之前文件更新过, 则继续处理
        filename, modified_time, last_pos = self._get_file_and_pos_from_saved_file()
        if filename:
            stat = os.stat(filename)
            if int(stat.st_mtime) > modified_time:
                return filename, int(last_pos)

        # 之前的文件处理完了,处理这个月的 (文件是按月切分的)
        path_template = "/data/crawler/_files3_/%s/%d/%s_%04d%02d.bin"
        today = util.get_date_with_day_duration(0)
        bin_file = path_template % (self.channel, today.year, self.channel, today.year, today.month)
        # 如果相等,则表示此文件是最新的,但是今天没有更新
        if filename == bin_file:
            return None, None

        return bin_file, 0
Exemplo n.º 6
0
    def get_ids(self):

        now = get_date_with_day_duration(0)
        now_str = "%d%02d%02d" % (now.year, now.month, now.day)

        dir = self._idx_fn.split('/')[0]
        if not os.path.exists(dir):
            os.mkdir(dir)

        with open(self._tmp_idx_fn, 'wb') as f:
            while(self.count < CVConfig.TOTAL_COUNT):
                jlid = self.get_one()
                if jlid:
                    print "SUCESS, count:{}".format(self.count)
                    f.write('%s\n' % jlid)
                    self.count += 1
                else:
                    print "FAIL"

        # 将数据移到文件idx, 临时文件移除,
        if os.path.exists(self._idx_fn):
            os.system("mv %s %s_%s" % (self._idx_fn, self._idx_fn, now_str))
        os.system('mv %s %s' % (self._tmp_idx_fn, self._idx_fn))
Exemplo n.º 7
0
    def get_cases(self):

        now = get_date_with_day_duration(0)
        now_str = "%d%02d%02d" % (now.year, now.month, now.day)
        if not os.path.exists('datas'):
            os.mkdir('datas')

        tmp_fn = 'datas/idx-ing'

        with open(tmp_fn, 'wb') as f:
            while self.count < JdConfig.TOTAL_IDS:
                i = self.get_one()
                if i:
                    f.write('%d\n' % i)
                    self.count += 1

                    print "total count:", self.count
                else:
                    print "fail id: ", i

        if os.path.exists('datas/idx'):
            os.system("mv datas/idx datas/idx_%s" % now_str)

        os.system('mv %s datas/idx' % tmp_fn)
Exemplo n.º 8
0
    def get_today_bin_file_name(self):
        today = util.get_date_with_day_duration(0)
        bin_file = IncEtlDispatcher.CRAWLER_BIN_FILE_TEMPLATE % (self.channel, today.year,
                                                                 self.channel, today.year, today.month)

        return bin_file
Exemplo n.º 9
0
            jdid = self.test_page(url, jdid)
	    if jdid:
	        self.counter += 1
		sys.stderr.write("OK %d\n" % self.counter)
	    else:
		sys.stderr.write("FAIL\n")
            return jdid
        except Exception as e:
            print e
            return None


if __name__ == "__main__":
    wl = JdWLTests()

    now = get_date_with_day_duration(0)
    now_str = "%d%02d%02d" % (now.year, now.month, now.day)

    if not os.path.exists('datas'):
        os.mkdir('datas')
    tmp_fn = 'datas/idx-ing'

    with open(tmp_fn, 'w') as fo:
        while wl.counter < JdConfig.TOTAL_IDS:
            jdid = wl.find_one()
            if jdid is not None:
                fo.write("%s\n" % jdid)

    if os.path.exists('datas/idx'):
        os.system("mv datas/idx datas/idx_%s" % now_str)
    os.system('mv %s datas/idx' % tmp_fn)