def check_report_integrity(self): for category in self.current_stock.report_category: self.current_category_metadata = self.current_stock.report_category[category] has_integrity_checked = False to_delete_report_ids = [] for report_id in self.current_category_metadata.report_metadata: self.current_report_metadata = self.current_category_metadata.report_metadata[report_id] date_str, target_path, target_url = self.get_report_path() if util.array_contains(self.current_report_metadata.announcementTitle, self.report_ignore_patterns): if self.current_report_metadata.is_download: util.delete_file(target_path) to_delete_report_ids.append(report_id) elif self.current_report_metadata.is_download: if not self.current_report_metadata.is_valid: if util.is_invalid_pdf(target_path): has_integrity_checked = True self.current_report_metadata.is_download = False self.current_report_metadata.is_valid = False util.delete_file(target_path) else: has_integrity_checked = True self.current_report_metadata.is_valid = True if len(to_delete_report_ids) > 0: has_integrity_checked = True for to_delete_report_id in to_delete_report_ids: logging.warn(self.current_stock.stock_code + " delete report " + self.current_category_metadata.report_metadata[to_delete_report_id].announcementTitle) del self.current_category_metadata.report_metadata[to_delete_report_id] if has_integrity_checked: self.serialization_single_stock_data()
def download_report(self): date_str, target_path, target_url = self.get_report_path() if os.path.exists(target_path): self.current_report_metadata.is_download = True return True try: logging.warn("download " + target_path) r = requests.get(target_url, stream=True, params={"announceTime": date_str}) with open(target_path, 'wb') as fd: for chunk in r.iter_content(chunk_size=16384): fd.write(chunk) self.current_report_metadata.is_download = True return True except (IOError, RuntimeError): logging.exception(self.current_stock.stock_code + ' save report failed. ' + self.current_report_metadata.announcementTitle) util.delete_file(target_path) return False
def check_report_integrity(self): for category in self.current_stock.report_category: self.current_category_metadata = self.current_stock.report_category[ category] has_integrity_checked = False to_delete_report_ids = [] for report_id in self.current_category_metadata.report_metadata: self.current_report_metadata = self.current_category_metadata.report_metadata[ report_id] date_str, target_path, target_url = self.get_report_path() if util.array_contains( self.current_report_metadata.announcementTitle, self.report_ignore_patterns): if self.current_report_metadata.is_download: util.delete_file(target_path) to_delete_report_ids.append(report_id) elif self.current_report_metadata.is_download: if not self.current_report_metadata.is_valid: if util.is_invalid_pdf(target_path): has_integrity_checked = True self.current_report_metadata.is_download = False self.current_report_metadata.is_valid = False util.delete_file(target_path) else: has_integrity_checked = True self.current_report_metadata.is_valid = True if len(to_delete_report_ids) > 0: has_integrity_checked = True for to_delete_report_id in to_delete_report_ids: logging.warn( self.current_stock.stock_code + " delete report " + self.current_category_metadata. report_metadata[to_delete_report_id].announcementTitle) del self.current_category_metadata.report_metadata[ to_delete_report_id] if has_integrity_checked: self.serialization_single_stock_data()