def check_report_integrity(self): for category in self.current_stock.report_category: self.current_category_metadata = self.current_stock.report_category[category] has_integrity_checked = False to_delete_report_ids = [] for report_id in self.current_category_metadata.report_metadata: self.current_report_metadata = self.current_category_metadata.report_metadata[report_id] date_str, target_path, target_url = self.get_report_path() if util.array_contains(self.current_report_metadata.announcementTitle, self.report_ignore_patterns): if self.current_report_metadata.is_download: util.delete_file(target_path) to_delete_report_ids.append(report_id) elif self.current_report_metadata.is_download: if not self.current_report_metadata.is_valid: if util.is_invalid_pdf(target_path): has_integrity_checked = True self.current_report_metadata.is_download = False self.current_report_metadata.is_valid = False util.delete_file(target_path) else: has_integrity_checked = True self.current_report_metadata.is_valid = True if len(to_delete_report_ids) > 0: has_integrity_checked = True for to_delete_report_id in to_delete_report_ids: logging.warn(self.current_stock.stock_code + " delete report " + self.current_category_metadata.report_metadata[to_delete_report_id].announcementTitle) del self.current_category_metadata.report_metadata[to_delete_report_id] if has_integrity_checked: self.serialization_single_stock_data()
def download_report_metadata_category(self, category): has_new_report_metadata = False self.current_category_metadata = self.current_stock.get_report_metadata( category) if self.current_category_metadata.is_report_metadata_need_download( self.fromCob, self.toCob): try: page_num = 1 while True: r = requests.post( self.report_metadata_url, files={ "stock": (None, self.current_stock.stock_code), "category": (None, category), "pageNum": (None, str(page_num)), "pageSize": (None, "30"), "column": (None, self.current_stock.exchange_name), "tabName": (None, "fulltext"), "seDate": (None, util.cob2date_range_string( self.fromCob, self.toCob)), }, timeout=45, stream=False, headers={'Connection': 'close'}) if r.status_code == requests.codes.ok: result = json.loads(r.content) reports = result['announcements'] for report in reports: if not util.array_contains( report['announcementTitle'], self.report_ignore_patterns): report_metadata = ReportMetadata( report['announcementId'], report['announcementTitle'], report['announcementTime']) if self.current_category_metadata.add_report_metadata( report_metadata): has_new_report_metadata = True if result['hasMore']: page_num += 1 else: break else: logging.error('download report metadata for ' + self.current_stock.stock_code) return None if self.current_category_metadata.update_effective_cob(self.fromCob, self.toCob) \ or has_new_report_metadata: self.serialization_single_stock_data() except (IOError, AttributeError, RuntimeError): logging.exception(self.current_stock.stock_code + ' save report metadata failed')
def download_report_metadata_category(self, category): has_new_report_metadata = False self.current_category_metadata = self.current_stock.get_report_metadata(category) if self.current_category_metadata.is_report_metadata_need_download(self.fromCob, self.toCob): try: page_num = 1 while True: r = requests.post(self.report_metadata_url, files={ "stock": (None, self.current_stock.stock_code), "category": (None, category), "pageNum": (None, str(page_num)), "pageSize": (None, "30"), "column": (None, self.current_stock.exchange_name), "tabName": (None, "fulltext"), "seDate": (None, util.cob2date_range_string(self.fromCob, self.toCob)), }, timeout=45, stream=False, headers={'Connection': 'close'}) if r.status_code == requests.codes.ok: result = json.loads(r.content) reports = result['announcements'] for report in reports: if not util.array_contains(report['announcementTitle'], self.report_ignore_patterns): report_metadata = ReportMetadata(report['announcementId'], report['announcementTitle'], report['announcementTime']) if self.current_category_metadata.add_report_metadata(report_metadata): has_new_report_metadata = True if result['hasMore']: page_num += 1 else: break else: logging.error('download report metadata for ' + self.current_stock.stock_code) return None if self.current_category_metadata.update_effective_cob(self.fromCob, self.toCob) \ or has_new_report_metadata: self.serialization_single_stock_data() except (IOError, AttributeError, RuntimeError): logging.exception(self.current_stock.stock_code + ' save report metadata failed')
def check_report_integrity(self): for category in self.current_stock.report_category: self.current_category_metadata = self.current_stock.report_category[ category] has_integrity_checked = False to_delete_report_ids = [] for report_id in self.current_category_metadata.report_metadata: self.current_report_metadata = self.current_category_metadata.report_metadata[ report_id] date_str, target_path, target_url = self.get_report_path() if util.array_contains( self.current_report_metadata.announcementTitle, self.report_ignore_patterns): if self.current_report_metadata.is_download: util.delete_file(target_path) to_delete_report_ids.append(report_id) elif self.current_report_metadata.is_download: if not self.current_report_metadata.is_valid: if util.is_invalid_pdf(target_path): has_integrity_checked = True self.current_report_metadata.is_download = False self.current_report_metadata.is_valid = False util.delete_file(target_path) else: has_integrity_checked = True self.current_report_metadata.is_valid = True if len(to_delete_report_ids) > 0: has_integrity_checked = True for to_delete_report_id in to_delete_report_ids: logging.warn( self.current_stock.stock_code + " delete report " + self.current_category_metadata. report_metadata[to_delete_report_id].announcementTitle) del self.current_category_metadata.report_metadata[ to_delete_report_id] if has_integrity_checked: self.serialization_single_stock_data()