Пример #1
0
    def check_report_integrity(self):
        for category in self.current_stock.report_category:
            self.current_category_metadata = self.current_stock.report_category[category]
            has_integrity_checked = False
            to_delete_report_ids = []
            for report_id in self.current_category_metadata.report_metadata:
                self.current_report_metadata = self.current_category_metadata.report_metadata[report_id]
                date_str, target_path, target_url = self.get_report_path()
                if util.array_contains(self.current_report_metadata.announcementTitle, self.report_ignore_patterns):
                    if self.current_report_metadata.is_download:
                        util.delete_file(target_path)
                    to_delete_report_ids.append(report_id)
                elif self.current_report_metadata.is_download:
                    if not self.current_report_metadata.is_valid:
                        if util.is_invalid_pdf(target_path):
                            has_integrity_checked = True
                            self.current_report_metadata.is_download = False
                            self.current_report_metadata.is_valid = False
                            util.delete_file(target_path)
                        else:
                            has_integrity_checked = True
                            self.current_report_metadata.is_valid = True

            if len(to_delete_report_ids) > 0:
                has_integrity_checked = True
                for to_delete_report_id in to_delete_report_ids:
                    logging.warn(self.current_stock.stock_code + " delete report " +
                                 self.current_category_metadata.report_metadata[to_delete_report_id].announcementTitle)
                    del self.current_category_metadata.report_metadata[to_delete_report_id]

            if has_integrity_checked:
                self.serialization_single_stock_data()
Пример #2
0
    def download_report(self):
        date_str, target_path, target_url = self.get_report_path()
        if os.path.exists(target_path):
            self.current_report_metadata.is_download = True
            return True

        try:
            logging.warn("download " + target_path)
            r = requests.get(target_url, stream=True, params={"announceTime": date_str})
            with open(target_path, 'wb') as fd:
                for chunk in r.iter_content(chunk_size=16384):
                    fd.write(chunk)
            self.current_report_metadata.is_download = True
            return True
        except (IOError, RuntimeError):
            logging.exception(self.current_stock.stock_code + ' save report failed. ' +
                              self.current_report_metadata.announcementTitle)
            util.delete_file(target_path)
            return False
Пример #3
0
    def check_report_integrity(self):
        for category in self.current_stock.report_category:
            self.current_category_metadata = self.current_stock.report_category[
                category]
            has_integrity_checked = False
            to_delete_report_ids = []
            for report_id in self.current_category_metadata.report_metadata:
                self.current_report_metadata = self.current_category_metadata.report_metadata[
                    report_id]
                date_str, target_path, target_url = self.get_report_path()
                if util.array_contains(
                        self.current_report_metadata.announcementTitle,
                        self.report_ignore_patterns):
                    if self.current_report_metadata.is_download:
                        util.delete_file(target_path)
                    to_delete_report_ids.append(report_id)
                elif self.current_report_metadata.is_download:
                    if not self.current_report_metadata.is_valid:
                        if util.is_invalid_pdf(target_path):
                            has_integrity_checked = True
                            self.current_report_metadata.is_download = False
                            self.current_report_metadata.is_valid = False
                            util.delete_file(target_path)
                        else:
                            has_integrity_checked = True
                            self.current_report_metadata.is_valid = True

            if len(to_delete_report_ids) > 0:
                has_integrity_checked = True
                for to_delete_report_id in to_delete_report_ids:
                    logging.warn(
                        self.current_stock.stock_code + " delete report " +
                        self.current_category_metadata.
                        report_metadata[to_delete_report_id].announcementTitle)
                    del self.current_category_metadata.report_metadata[
                        to_delete_report_id]

            if has_integrity_checked:
                self.serialization_single_stock_data()
Пример #4
0
    def download_report(self):
        date_str, target_path, target_url = self.get_report_path()
        if os.path.exists(target_path):
            self.current_report_metadata.is_download = True
            return True

        try:
            logging.warn("download " + target_path)
            r = requests.get(target_url,
                             stream=True,
                             params={"announceTime": date_str})
            with open(target_path, 'wb') as fd:
                for chunk in r.iter_content(chunk_size=16384):
                    fd.write(chunk)
            self.current_report_metadata.is_download = True
            return True
        except (IOError, RuntimeError):
            logging.exception(self.current_stock.stock_code +
                              ' save report failed. ' +
                              self.current_report_metadata.announcementTitle)
            util.delete_file(target_path)
            return False