def update(self, sleep_time): succeed = False while True: self.logger.debug("enter daily update process. %s" % datetime.now().strftime('%Y-%m-%d %H:%M:%S')) try: if self.cal_client.is_trading_day(): #self.logger.info("is trading day. %s, succeed:%s" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), succeed)) if self.is_collecting_time(): self.logger.debug( "enter collecting time. %s, succeed:%s" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), succeed)) if not succeed: self.clear_network_env() mdate = datetime.now().strftime('%Y-%m-%d') ndate = get_latest_data_date() if ndate is not None: if ndate >= transfer_date_string_to_int(mdate): if self.updating_date is None: self.updating_date = mdate succeed = self.bootstrap( cdate=self.updating_date, exec_date=self.updating_date) if succeed: self.updating_date = None else: self.logger.debug("%s is older for %s" % (ndate, mdate)) else: succeed = False gevent.sleep(sleep_time) except Exception as e: time.sleep(1) self.logger.error(e)
def update(self, sleep_time): while True: try: self.logger.debug("enter update") if self.cal_client.is_trading_day(): if self.is_collecting_time(): ndate = get_latest_data_date(filepath = "/Volumes/data/quant/stock/data/stockdatainfo.json") mdate = transfer_date_string_to_int(datetime.now().strftime('%Y-%m-%d')) if ndate < mdate: self.run(SCRIPT1, timeout = 600) self.run(SCRIPT2, timeout = 2700) except Exception as e: self.logger.error(e) time.sleep(sleep_time)
return new_report_paths if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Download PDF reports not already available in the reports/ folder. " "Outputs the (space-separated) list of new reports (if any)." ) parser.add_argument( # This option exists for running the script in GitHub actions. The repo doesn't # store the PDF reports, so I use the date of the latest dataset in data/by-date # to decide what report(s) to download. '--since-latest-dataset-date', action='store_true', help="Ignore reports predating the date of the latest dataset (in data/by-date)" ) parser.add_argument('--debug', action="store_true") args = parser.parse_args() logging.basicConfig( level=logging.DEBUG if args.debug else logging.INFO, format="%(levelname)s: %(message)s" ) new_paths = download_missing_reports( after=get_latest_data_date() if args.since_latest_dataset_date else '' ) print(" ".join(str(path) for path in new_paths))
] if not ordered_parts: logger.info("No datasets found in", input_dir) return False dataset = pd.concat(ordered_parts, axis=0) dataset.to_csv(out_path, line_terminator="\n") logger.info("Full dataset written to: %s", out_path) return out_path if __name__ == "__main__": import argparse from download_reports import download_missing_reports parser = argparse.ArgumentParser() parser.add_argument("-o", "--overwrite", action="store_true") parser.add_argument("--debug", action="store_true") args = parser.parse_args() logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO, format="%(levelname)s: %(message)s") download_missing_reports(after=get_latest_data_date()) change_list = extract_data_from_reports(skip_existing=not args.overwrite) if args.overwrite or change_list: path_full = make_dataset() change_list.append(path_full) print(' '.join(str(path) for path in change_list))