def __init__(self): const_path = sys.path[0].replace("\\craw_data\\stock_list", "") f = open(const_path + '\\const.json', 'r', encoding='utf8') self.consts = json.loads(f.read()) self.downloader = download.Downloader() # self.downloader.init_zhilian_ip() # 使用代理 ip (付费) # self.downloader.init_ip_pool() # 使用代理 ip (免费) self.arr = []
def __init__(self, end_date='00000000'): const_path = sys.path[0].replace("\\craw_data\\dayline", "") f = open(const_path + "\\const.json", "r", encoding="utf8") self.consts = json.loads(f.read()) self.stock_list_file = self.consts['stock_list_file'] # 全部股票信息的csv文件 self.save_path_prefix = self.consts['day_line_file_prefix']['ease_money'] # 日线存储文件夹目录 self.end_date = end_date # 截止日期 self.codes = self.get_codes() self.downloader = download.Downloader() # 下载器
def __init__(self, end_date='99999999', thread_num=1, timeout=10): const_path = sys.path[0].replace("\\craw_data\\dayline", "") # print(const_path) f = open(const_path + "\\const.json", "r", encoding='utf8') consts = json.loads(f.read()) self.stock_list_file = consts['stock_list_file'] # 全部股票信息的csv文件 self.save_path_prefix = consts['day_line_file_prefix'][ 'netease'] # 日线存储文件夹目录 self.end_date = end_date # 截止日期 self.thread_num = thread_num # 线程数 self.timeout = timeout # 线程超时 self.downloader = download.Downloader() # 下载器
def run(self): """ Run the pipeline according to the passed command line args Returns ------- """ # Get some initialization info required for the pipeline to run initializer_obj = initialize.Initializer(self.instr, self.cfg) initializer_obj.initialize_dates() initializer_obj.get_date_ranges() initializer_obj.get_processed_ranges() if self.initialize: initializer_obj.initialize_HDF5(chunks=self.chunks) # Initialize the downloader downloader = download.Downloader(instr=self.instr, instr_cfg=self.instr_cfg) # Divide up the dates into chunks date_chunks = np.array_split(initializer_obj.dates, self.chunks) for i, chunk in enumerate(date_chunks): for (start, stop) in chunk: failed = False results = None if '{} {}'.format(start.iso, stop.iso) in \ initializer_obj.previously_analyzed: LOG.info('Already analyzed {} to {}\n'.format( start.iso, stop.iso)) continue # Start the analysis LOG.info('Analyzing data from {} to {}'.format( start.iso, stop.iso)) if self.download: download_time = self.run_downloader(date_range=(start, stop), downloader=downloader) self.processing_times['download'] = download_time self.flist = glob.glob(self.search_pattern) if self.process: process_time = self.run_processing(start, stop) self.processing_times['cr_rejection'] = process_time # Analyze the images and extract the results iff files # were successfully processed through CR rejection AND # the analyze flag is True. if self.analyze and self.flist: analysis_time, results = self.run_labeling_all( chunk_num=i + 1) self.processing_times['analysis'] = analysis_time else: failed = True self.processing_times['total'] = sum( list(self.processing_times.values())) # Clean up downloaded files and write out the range we just processed self._pipeline_cleanup(start, stop, failed) # Send the final email iff there were results computed if results: self.send_email(start, stop, results)