Пример #1
0
    def check_report_integrity(self):
        for category in self.current_stock.report_category:
            self.current_category_metadata = self.current_stock.report_category[category]
            has_integrity_checked = False
            to_delete_report_ids = []
            for report_id in self.current_category_metadata.report_metadata:
                self.current_report_metadata = self.current_category_metadata.report_metadata[report_id]
                date_str, target_path, target_url = self.get_report_path()
                if util.array_contains(self.current_report_metadata.announcementTitle, self.report_ignore_patterns):
                    if self.current_report_metadata.is_download:
                        util.delete_file(target_path)
                    to_delete_report_ids.append(report_id)
                elif self.current_report_metadata.is_download:
                    if not self.current_report_metadata.is_valid:
                        if util.is_invalid_pdf(target_path):
                            has_integrity_checked = True
                            self.current_report_metadata.is_download = False
                            self.current_report_metadata.is_valid = False
                            util.delete_file(target_path)
                        else:
                            has_integrity_checked = True
                            self.current_report_metadata.is_valid = True

            if len(to_delete_report_ids) > 0:
                has_integrity_checked = True
                for to_delete_report_id in to_delete_report_ids:
                    logging.warn(self.current_stock.stock_code + " delete report " +
                                 self.current_category_metadata.report_metadata[to_delete_report_id].announcementTitle)
                    del self.current_category_metadata.report_metadata[to_delete_report_id]

            if has_integrity_checked:
                self.serialization_single_stock_data()
Пример #2
0
    def download_report_metadata_category(self, category):
        has_new_report_metadata = False
        self.current_category_metadata = self.current_stock.get_report_metadata(
            category)
        if self.current_category_metadata.is_report_metadata_need_download(
                self.fromCob, self.toCob):
            try:
                page_num = 1
                while True:
                    r = requests.post(
                        self.report_metadata_url,
                        files={
                            "stock": (None, self.current_stock.stock_code),
                            "category": (None, category),
                            "pageNum": (None, str(page_num)),
                            "pageSize": (None, "30"),
                            "column": (None, self.current_stock.exchange_name),
                            "tabName": (None, "fulltext"),
                            "seDate": (None,
                                       util.cob2date_range_string(
                                           self.fromCob, self.toCob)),
                        },
                        timeout=45,
                        stream=False,
                        headers={'Connection': 'close'})
                    if r.status_code == requests.codes.ok:
                        result = json.loads(r.content)
                        reports = result['announcements']
                        for report in reports:
                            if not util.array_contains(
                                    report['announcementTitle'],
                                    self.report_ignore_patterns):
                                report_metadata = ReportMetadata(
                                    report['announcementId'],
                                    report['announcementTitle'],
                                    report['announcementTime'])
                                if self.current_category_metadata.add_report_metadata(
                                        report_metadata):
                                    has_new_report_metadata = True

                        if result['hasMore']:
                            page_num += 1
                        else:
                            break
                    else:
                        logging.error('download report metadata for ' +
                                      self.current_stock.stock_code)
                        return None
                if self.current_category_metadata.update_effective_cob(self.fromCob, self.toCob) \
                        or has_new_report_metadata:
                    self.serialization_single_stock_data()
            except (IOError, AttributeError, RuntimeError):
                logging.exception(self.current_stock.stock_code +
                                  ' save report metadata failed')
Пример #3
0
    def download_report_metadata_category(self, category):
        has_new_report_metadata = False
        self.current_category_metadata = self.current_stock.get_report_metadata(category)
        if self.current_category_metadata.is_report_metadata_need_download(self.fromCob, self.toCob):
            try:
                page_num = 1
                while True:
                    r = requests.post(self.report_metadata_url,
                                      files={
                                          "stock": (None, self.current_stock.stock_code),
                                          "category": (None, category),
                                          "pageNum": (None, str(page_num)),
                                          "pageSize": (None, "30"),
                                          "column": (None, self.current_stock.exchange_name),
                                          "tabName": (None, "fulltext"),
                                          "seDate": (None, util.cob2date_range_string(self.fromCob, self.toCob)),
                                      }, timeout=45, stream=False,
                                      headers={'Connection': 'close'})
                    if r.status_code == requests.codes.ok:
                        result = json.loads(r.content)
                        reports = result['announcements']
                        for report in reports:
                            if not util.array_contains(report['announcementTitle'], self.report_ignore_patterns):
                                report_metadata = ReportMetadata(report['announcementId'],
                                                                 report['announcementTitle'],
                                                                 report['announcementTime'])
                                if self.current_category_metadata.add_report_metadata(report_metadata):
                                    has_new_report_metadata = True

                        if result['hasMore']:
                            page_num += 1
                        else:
                            break
                    else:
                        logging.error('download report metadata for ' + self.current_stock.stock_code)
                        return None
                if self.current_category_metadata.update_effective_cob(self.fromCob, self.toCob) \
                        or has_new_report_metadata:
                    self.serialization_single_stock_data()
            except (IOError, AttributeError, RuntimeError):
                logging.exception(self.current_stock.stock_code + ' save report metadata failed')
Пример #4
0
    def check_report_integrity(self):
        for category in self.current_stock.report_category:
            self.current_category_metadata = self.current_stock.report_category[
                category]
            has_integrity_checked = False
            to_delete_report_ids = []
            for report_id in self.current_category_metadata.report_metadata:
                self.current_report_metadata = self.current_category_metadata.report_metadata[
                    report_id]
                date_str, target_path, target_url = self.get_report_path()
                if util.array_contains(
                        self.current_report_metadata.announcementTitle,
                        self.report_ignore_patterns):
                    if self.current_report_metadata.is_download:
                        util.delete_file(target_path)
                    to_delete_report_ids.append(report_id)
                elif self.current_report_metadata.is_download:
                    if not self.current_report_metadata.is_valid:
                        if util.is_invalid_pdf(target_path):
                            has_integrity_checked = True
                            self.current_report_metadata.is_download = False
                            self.current_report_metadata.is_valid = False
                            util.delete_file(target_path)
                        else:
                            has_integrity_checked = True
                            self.current_report_metadata.is_valid = True

            if len(to_delete_report_ids) > 0:
                has_integrity_checked = True
                for to_delete_report_id in to_delete_report_ids:
                    logging.warn(
                        self.current_stock.stock_code + " delete report " +
                        self.current_category_metadata.
                        report_metadata[to_delete_report_id].announcementTitle)
                    del self.current_category_metadata.report_metadata[
                        to_delete_report_id]

            if has_integrity_checked:
                self.serialization_single_stock_data()