def download(contracts, market, timeframe, destdir, delay, startdate, enddate, skiperr, skip_existing, update_existing, sanitize=True, daysdelta=20): exporter = Exporter() if not any((contracts, market)): raise click.BadParameter('Neither contracts nor market is specified') market_filter = dict() if market: market_filter.update(market=Market[market]) contracts_df = exporter.lookup(**market_filter) contracts_df = contracts_df.reset_index() destpath = os.path.join(destdir, '{}.{}'.format(market, "csv")) contracts_df.to_csv(destpath) else: contracts_list = [] for contract_code in contracts: try: contracts_list.append( exporter.lookup(code=contract_code, **market_filter)) except FinamObjectNotFoundError: logger.info('unknown contract "{}"'.format(contract_code)) if not skiperr: sys.exit(1) contracts_df = pd.concat(contracts_list) date_ranges = list( make_date_ranges(startdate, enddate, datetime.timedelta(days=daysdelta))) if not contracts: contracts_to_dl = sorted(contracts_df['code'].to_list()) else: contracts_to_dl = sorted(contracts) t = tqdm(contracts_to_dl, position=0) for contract_code in t: this_contract = contracts_df[contracts_df['code'] == contract_code].iloc[0] destpath = os.path.join( destdir, '{}-{}.{}'.format(this_contract.code, timeframe, "parquet")) data_orig = None last_datetime = None if os.path.exists(destpath): if skip_existing: continue if update_existing: data_orig = pd.read_parquet(destpath) last_datetime = data_orig.index[-1] # logger.info(u'Downloading contract {}'.format(contract)) if t: t.set_description(f" {contract_code} {this_contract['name']}") t.refresh() # to show immediately the update all_data = [] for date_range in tqdm(date_ranges, position=1): if last_datetime is not None: if date_range[1] < last_datetime: continue if date_range[1] - last_datetime <= pd.Timedelta( days=daysdelta): date_range = (last_datetime, date_range[1]) try: data = exporter.download(this_contract['id'], start_date=date_range[0], end_date=date_range[1], timeframe=Timeframe[timeframe], market=Market( this_contract['market'])) # if len(all_data) > 0 and len(data) <=0: # raise ValueError("returned nothing") """ if len(all_data) > 0 and (datepoint_to_date(data.iloc[0]) - datepoint_to_date(all_data[-1].iloc[-1]) > timedeltas_by_setting[Timeframe[timeframe]]): raise ValueError("returned a hole") """ if len(data) > 0: all_data.append(data) time.sleep(delay) except FinamExportError as e: if skiperr: logger.error(repr(e)) continue else: raise if len(all_data) > 0: data = pd.concat(all_data) if sanitize: data = sanitize_df(data) if data_orig is not None and last_datetime is not None: data = data[data.index > last_datetime] data = pd.concat([data_orig, data]) if len(data.index) > 0 and data.index.duplicated().any(): print(f"{contract_code} has duplicates?") continue if sanitize: data.to_parquet(destpath) else: data.to_parquet(destpath, index=False) if delay > 0: logger.info('Sleeping for {} second(s)'.format(delay)) time.sleep(delay)
class FinamDataDownloader: """Класс, который скачивает финансовые данные с Finam.""" market_to_str = bidict({ Market.COMMODITIES: "commodities", Market.BONDS: "bonds", Market.CURRENCIES_WORLD: "currencies_world", Market.CURRENCIES: "currencies", Market.ETF: "etf", Market.ETF_MOEX: "etf_moex", Market.FUTURES: "futures", Market.FUTURES_ARCHIVE: "futures_archive", Market.FUTURES_USA: "futures_usa", Market.INDEXES: "indexes", Market.SHARES: "shares", Market.USA: "usa", Market.SPB: "spb", }) tf_to_str = bidict({ Timeframe.TICKS: "ticks", Timeframe.MINUTES1: "minutes1", Timeframe.MINUTES5: "minutes5", Timeframe.MINUTES10: "minutes10", Timeframe.MINUTES15: "minutes15", Timeframe.MINUTES30: "minutes30", Timeframe.HOURLY: "hourly", Timeframe.DAILY: "daily", Timeframe.WEEKLY: "weekly", Timeframe.MONTHLY: "monthly", }) def __init__( self, data_dir=Path(__file__).parent.parent.parent / 'data', start_date=datetime.date(2001, 1, 1), overwrite: bool = False, ): # параметры self.start_date = start_date self.data_dir: Path = data_dir self.overwrite: bool = overwrite # объекты рани self.exporter = Exporter() def download_all( self, market: str, timeframe: str, ): """Скачиваем все акции США.""" market = self.market_to_str.inverse[market] timeframe = self.tf_to_str.inverse[timeframe] df = self.find_all(market=market) for i, row in df.iterrows(): logger.info( f'Скачиваем {row["code"]} {i}/{len(df)} == {int(i)/len(df)}') self.download_asset(id=row['id'], ticker=row['code'], timeframe=timeframe, market=market) def find_all(self, market: int) -> pd.DataFrame: """Находим все интересные нам бумаги.""" return self.exporter.lookup( name='', market=market, name_comparator=LookupComparator.STARTSWITH, ).sort_values('code').reset_index() def download_asset( self, id: int, ticker: int, market: int, timeframe: int, ) -> None: """Скачиваем один конкретный ассет.""" logger.info( f'Скачиваем данные в папку {self.data_dir.resolve().absolute()}') try: fname = self.get_filename(ticker=ticker, market=market, timeframe=timeframe) logger.info(f'Проверяем {fname.resolve().absolute()}') if self.overwrite or not fname.exists(): df = self.exporter.download( id_=id, start_date=self.start_date, end_date=None, market=market, timeframe=timeframe, delay=1, max_in_progress_retries=10, ) self.save_asset(df, market=market, timeframe=timeframe, ticker=ticker) else: logger.info(f'{ticker} Пропускаем, так как уже скачан.') except Exception as e: # noqa logger.info(f'{ticker} Ошибка скачивания. {e}') # raise e def save_asset( self, df: pd.DataFrame, market: int, timeframe: int, ticker: int, ): """Сохраняем результат в файл.""" try: path = self.get_filename(ticker=ticker, market=market, timeframe=timeframe) df.to_parquet(path, compression='brotli') except Exception as e: # noqa logger.info(f'{ticker} Ошибка сохранения. {e}') # raise e def get_filename( self, ticker: int, market: int, timeframe: int, ) -> Path: """Получаем имя файла, в которое будем все записывать.""" m = self.get_verbose_market(market) t = self.get_verbose_timeframe(timeframe) dir_path = self.data_dir / m / t dir_path.mkdir(parents=True, exist_ok=True) return dir_path / f"{ticker}.parquet" def get_verbose_market(self, market: int) -> str: """Получаем название рынка.""" return self.market_to_str[market] def get_verbose_timeframe(self, tf: int) -> str: """Получаем название рынка.""" return self.tf_to_str[tf]