def write_to_parquet(file, batches, base, quote, append=False): """ Writes a batch of candles data to a parquet file. """ df = pd.concat(batches, ignore_index=True) df = pp.quick_clean(df) if append: pp.append_raw_to_parquet(df, file, False) else: pp.write_raw_to_parquet(df, file, False) return len(df.index)
def all_candles_to_csv(base, quote, interval='1m', with_parquet=False): """Collect a list of candlestick batches with all candlesticks of a trading pair, concat into a dataframe and write it to CSV. """ # see if there is any data saved on disk already try: batches = [pd.read_csv(f'data/{base}-{quote}_interval-{interval}.csv')] last_timestamp = batches[-1]['open_time'].max() except FileNotFoundError: batches = [pd.DataFrame([], columns=LABELS)] last_timestamp = 0 old_lines = len(batches[-1].index) # gather all candlesticks available, starting from the last timestamp loaded from disk or 0 # stop if the timestamp that comes back from the api is the same as the last one previous_timestamp = None while previous_timestamp != last_timestamp: # stop if we reached data from today if date.fromtimestamp(last_timestamp / 1000) >= date.today(): break previous_timestamp = last_timestamp new_batch = get_batch(symbol=base + quote, interval=interval, start_time=last_timestamp + 1) # requesting candles from the future returns empty # also stop in case response code was not 200 if new_batch.empty: break last_timestamp = new_batch['open_time'].max() # sometimes no new trades took place yet on date.today(); # in this case the batch is nothing new if previous_timestamp == last_timestamp: break batches.append(new_batch) last_datetime = datetime.fromtimestamp(last_timestamp / 1000) covering_spaces = 20 * ' ' print(datetime.now(), base, quote, interval, str(last_datetime) + covering_spaces, end='\r', flush=True) df = pd.concat(batches, ignore_index=True) df = pp.quick_clean(df) if with_parquet: # write clean version of csv to parquet parquet_name = f'{base}-{quote}.parquet' full_path = f'compressed/{parquet_name}' pp.write_raw_to_parquet(df, full_path) METADATA['data'].append({ 'description': f'All trade history for the pair {base} and {quote} at {interval} intervals. Counts {df.index.size} records.', 'name': parquet_name, 'totalBytes': os.stat(full_path).st_size, 'columns': [] }) # in the case that new data was gathered write it to disk if len(batches) > 1: df.to_csv(f'data/{base}-{quote}_interval-{interval}.csv', index=False) return len(df.index) - old_lines return 0