def __init__(self, endpoint): self.logger = MyLogger.getLogger('TestApp', file='trades.log', level=MyLogger.TRADE) self.clock = TradingClock.getInstance() self.datafactory = DataFactory.getInstance() self.endpoint = endpoint self.clock.add_callback(self.time_change) self.pending_orders = {} self.historical_subscribers = {}
def run(self): DataFactory.prejack_symbols = self.symbols clock, datafactory = TradingClock.getInstance(), DataFactory.getInstance() # Configure # TODO: TOML # Where do we have available data earliest_data, latest_data = datafactory.datesSpread(barsize=self.barsize) start_date = TradingCalendar.add_trading_days(earliest_data, self.setup_days) end_date = latest_data # TradingCalendar.add_trading_days(clock.date, 1) # clock.set_day(TradingCalendar.add_trading_days(latest_data, -12)) # Get a groove on clock.set_day(start_date) while (clock.date <= end_date): self.logger.info(clock.date) # Daily setup m = Manager(self.account, self.simtracker) strategies = [stype(m, **strategy_kwargs) for stype, strategy_kwargs in self.strategies] # Guts of the simulation if self.rapid: for simtime in [clock.mytz.localize(datetime.datetime.combine(clock.date, time)) for time in self.times]: clock.sync_datetime = simtime for strategy in strategies: strategy.update() else: for simtime in TradingCalendar.tradingtimes(clock.date): clock.sync_datetime = simtime # On the hour # if clock.sync_datetime.time().hour > 10 and clock.sync_datetime.time().minute == 0: if clock.sync_datetime.time() in self.times: # todo: need to really use the tws api before figuring this out.. for strategy in strategies: strategy.update() if clock.date == end_date: for strategy in strategies: strategy.closeall() # Daily teardown m.stop() # NEXT! clock.roll_day() self.account.stop() print("THREAD DONE.")
def __init__(self, startingcapital): self.logger = MyLogger.getLogger("ACC") self.datafactory = DataFactory.getInstance() self.clock = TradingClock.getInstance() self.clock.add_callback(self.time_change) self.orderLedger = {} self.portfolio = {} # Portfolio(startingcapital) self.cash = startingcapital self.CASH = pd.Series() self.HOLDINGS = pd.Series() self.done = False
def scrape_new(ticker, period, start_date, end_date): """Scrapes new data to the current store""" logger = Logger.getInstance() datafactory = DataFactory.getInstance() # File locations csv_path = datafactory.getDataDir(period) + datafactory.symbol2file(ticker) record_exists = Path(csv_path).exists() # Attempt to find the date range record = None try: if record_exists: record = datafactory.loadSymbol(ticker) record_end_date = record.index[-1] daydelta = datetime.timedelta(days=1) start_date = max(start_date, (record_end_date + daydelta).date()) # Can potentially skip dates_between = pd.date_range(start_date, end_date - daydelta, freq='d') dates_between_workdays = pd.Series(dates_between).transform( lambda x: tcal.is_trading_day(x)) if (dates_between_workdays.empty) or ( not dates_between_workdays.any()): logger.LogEvent("INFO", f"No dates to update for {ticker} {period}") return True except Exception as e: logger.LogEvent( "ERROR", f"Error getting date ({ticker}, {period}): {e}, {type(e)}") # Attempt to scrape the data try: logger.LogEvent( "INFO", f"Collecting {ticker} {period} from {start_date} to {end_date}") dataframe_dl = getDataframe(ticker, start_date, end_date, period, instant=False) if not dataframe_dl.empty: if record is None: # Failed to load if record_exists: today = datetime.datetime.now() copyfile( csv_path, f"{csv_path[:-4]} - Copy {today.month}-{today.day}{csv_path[-4:]}" ) dataframe_dl.to_csv(csv_path) else: dataframe_dl.to_csv(csv_path, mode='a', header=False) return True except Exception as e: logger.LogEvent( "ERROR", f"Error downloading ({ticker}, {period}): {e}, {type(e)}")
def scrape_repair(ticker, period, start_date): """Aim to fill missing gaps. First aims for days. Then times""" logger = Logger.getInstance() datafactory = DataFactory.getInstance() daydelta = datetime.timedelta(days=1) csv_path = datafactory.getDataDir(period) + datafactory.symbol2file(ticker) # Load the dataframe, get list of dates dataframe_full = datafactory.loadSymbol(ticker, period) dataframe_full = dataframe_full[~dataframe_full.index.duplicated()] dataframe_dates = pd.Series( dataframe_full.index).transform(lambda x: x.date()) dataframe_full_dates = sorted( [x for x in set(dataframe_dates) if x >= start_date]) mytz = TradingClock.mytz # PT1: Are any dates missing try: prefix_dates = len(dataframe_full_dates) missing_dates = [] for datei in range(1, len(dataframe_full_dates)): day1 = dataframe_full_dates[datei - 1] day2 = dataframe_full_dates[datei] missing_dates = missing_dates + [ x.date() for x in pd.date_range( day1 + daydelta, day2 - daydelta, freq='d') if tcal.is_trading_day(x.date()) ] if len(missing_dates) > 0: # Combine missing dates to ranges missing_ranges = list( zip(missing_dates, [ tcal.add_trading_days(missing_date, 1) for missing_date in missing_dates ])) # zip(missing_dates, [next_working_day(missing_date, cal) for missing_date in missing_dates])) for datei in range(len(missing_ranges) - 2, -1, -1): c1, c2 = missing_ranges[datei] n1, n2 = missing_ranges[datei + 1] if c2 + datetime.timedelta( days=(0 if period == "5m" else 50)) >= n1: missing_ranges.pop(datei + 1) missing_ranges[datei] = (c1, n2) # Patch it up logger.LogEvent( "INFO", f"Collecting missing dates for {ticker} {period}") for missing_start, missing_end in missing_ranges: dataframe_patch = getDataframe(ticker, missing_start, missing_end, period, instant=False) if not dataframe_patch.empty: before_dl = dataframe_full[:datetime.datetime( year=missing_start.year, month=missing_start.month, day=missing_start.day, tzinfo=mytz)] after_dl = dataframe_full[datetime. datetime(year=missing_end.year, month=missing_end.month, day=missing_end.day, tzinfo=mytz):] dataframe_full = pd.concat( [before_dl, dataframe_patch, after_dl]) else: logger.LogEvent( "WARN", f"Cannot find data for ({ticker}, {period}) between {missing_start}=>{missing_end} to patch data" ) else: logger.LogEvent("INFO", f"No missing dates for {ticker} {period}") dataframe_dates = pd.Series( dataframe_full.index).transform(lambda x: x.date()) dataframe_full_dates = sorted( [x for x in set(dataframe_dates) if x >= start_date]) postfix_dates = len(dataframe_full_dates) if prefix_dates < postfix_dates: # Over-write what we have saved dataframe_full.index.name = "Datetime" dataframe_full.to_csv(csv_path, index_label=dataframe_full.index.name) except Exception as e: logger.LogEvent( "ERROR", f"Error fixing missing dates for ({ticker}, {period}): {e}, {type(e)}" ) return False, -1, -1 # PT2: What is the content like ?? try: missing_days_times = {} prefix_rows = len(dataframe_full) if period != "1d": # setup missing_cutoff = datetime.time(14, 00) if period == "5m": t_range = pd.Series( pd.date_range("10:00", "15:55", freq="5min")).transform(lambda x: x.time()) else: raise ValueError(f"Period {period} not supported") # fill the missing_days_times dict for df_date in dataframe_full_dates: # List of times for the day df_dt = datetime.datetime(year=df_date.year, month=df_date.month, day=df_date.day, tzinfo=mytz) t_dataframe = pd.Series( dataframe_full[df_dt:df_dt + daydelta].index).transform( lambda x: x.time()) # Are all of these times in the expected time range? missing_times = t_range[~t_range.isin(t_dataframe)] if tcal.is_partial_trading_day(df_date): missing_times = [ x for x in missing_times if x < missing_cutoff ] if len(missing_times) > 0: missing_days_times[df_date] = missing_times # If there is any data missing, try and fix missing_times_dates = list(missing_days_times.keys()) if len(missing_times_dates) > 0: # Combine missing dates to ranges missing_ranges = list( zip(missing_times_dates, [ tcal.add_trading_days(missing_date, 1) for missing_date in missing_times_dates ])) for datei in range(len(missing_ranges) - 2, -1, -1): c1, c2 = missing_ranges[datei] n1, n2 = missing_ranges[datei + 1] # Give this one a bit of room, there are more missing if c2 + datetime.timedelta(days=2) >= n1: missing_ranges.pop(datei + 1) missing_ranges[datei] = (c1, n2) logger.LogEvent( "INFO", f"Collecting missing times for {ticker} {period}") for missing_start, missing_end in missing_ranges: dataframe_patch = getDataframe(ticker, missing_start, missing_end, period, instant=False) patch_dates = set( pd.Series(dataframe_patch.index).transform( lambda x: x.date())) for patch_date in sorted(list(patch_dates)): # Check if the data wasn't added when grouping ranges if patch_date in missing_days_times.keys(): missing_dtimes = pd.Series([ datetime.datetime.combine(patch_date, mdt) for mdt in missing_days_times[patch_date] ]).transform(lambda x: x.tz_localize(mytz)) times_found = missing_dtimes[missing_dtimes.isin( dataframe_patch.index)] for found_time in times_found: # patcher = dataframe_patch.loc[found_time] before_dl = dataframe_full[ dataframe_full.index < found_time] patcher = pd.DataFrame( [dataframe_patch.loc[found_time].values], columns=[ xx for xx in dataframe_full.columns if not xx == "Datetime" ], index=pd.DatetimeIndex([found_time])) after_dl = dataframe_full[ dataframe_full.index > found_time] dataframe_full = pd.concat( [before_dl, patcher, after_dl]) # Check that some changes were actually made... fixed_rows = len(dataframe_full) - prefix_rows if fixed_rows > 0: logger.LogEvent( "INFO", f"Patched {fixed_rows} rows successfully for {ticker} {period}" ) if not dataframe_full.index.is_monotonic: dataframe_full = dataframe_full.index_sort() logger.LogEvent("ERROR", f"Index not sorted properly {ticker}") dataframe_full.index.name = "Datetime" dataframe_full.to_csv(csv_path) else: logger.LogEvent("WARN", f"No missing time patched for {ticker} {period}") except Exception as e: logger.LogEvent( "ERROR", f"Error fixing missing times for ({ticker}, {period}): {e}, {type(e)}" ) return False, -1, -1 # Leftovers fixed_dates = postfix_dates - prefix_dates outstanding_dates = len(missing_dates) - fixed_dates fixed_times = fixed_rows outstanding_times = sum([len(x) for x in missing_days_times.values() ]) - fixed_times return True, outstanding_dates, outstanding_times
if __name__ == "__main__": """Runs scheduled data scrapes and repairs. Executed by batch script daily""" # TODO: Change ledger to reflect periods (sep 5m / 1d) print("DATA SCRAPER UTILITY") # Parse args testing = False for arg in sys.argv: if arg == "-test": print("-test detected") testing = True print("") # Setup logger = Logger.getInstance() datafactory = DataFactory.getInstance() # lEDGER DETAILS ledger_path = TICKERS_LEDGER tickers_ledger = pd.read_csv(ledger_path) dtf = "%d/%m/%Y" # 20/12/2020 def empty_col(val): if type(val) is not float: return True return not np.isnan(val) # Tickers scheduled for collection coll_dates = tickers_ledger["COLL_LAST"].transform( lambda x: datetime.datetime.strptime(x, dtf).date() if empty_col(x) else datetime.date(1900, 1, 1))
# # invol, inval = self.manager.account.getPosition(symbol) # if lower_cross ^ upper_cross: # if lower_cross: # if not invol: # #if current_price < sma_long_plus[-1]: # goal = self.manager.account.cash // 2 # minval = 1000 # if goal > minval: # self.openPosition(symbol, goal // asset_data.values[-1]) # if upper_cross: # if invol: # self.closePosition(symbol, invol) if __name__ == "__main__": DataFactory.repaired = True clock, datafactory = TradingClock.getInstance(), DataFactory.getInstance() clock.sync_datetime = datetime.datetime.now() m = Manager() s = CrystalV3( m, "SA", [], { "ma_days": 1, "ma_long_days": 1, "positive_sigma": 0.25, "negative_sigma": 0 }) print('asdf')
def refresh(self, view=0): self.orderstv.delete(*self.orderstv.get_children()) for i, order in enumerate(list( self.account.orderLedger.values())): # [:view+1] symbol = order["contract"].symbol if self.orders_entry.get() != "": if symbol != self.orders_entry.get().upper(): continue self.orderstv.insert("", tk.END, i, values=(f"{i + 1}", symbol, order["order"].action, order['time'].strftime('%d/%m %H:%M'), round(order["avgFillPrice"], 3), order["filled"])) self.holdingstv.delete(*self.holdingstv.get_children()) holdings = [] for symbol in self.account.portfolio.keys(): # [:view+1] try: vol, avgprice = self.account.getPosition(symbol) price = DataFactory.getInstance().getLatestPrice(symbol) percent = 100 * (price - avgprice) / avgprice realised = round(self.account.getRealised(symbol), 2) holdings.append((symbol, vol, round(avgprice, 3), round(price, 3), round(percent, 3), round(realised, 3))) except Exception as e: #self.logger.warn(f"Failed to update portfolio side view {e}") pass for i, (symbol, vol, avgprice, price, percent, realised) in enumerate( sorted(holdings, key=lambda item: (item[-1], item[-2]))): self.holdingstv.insert( "", tk.END, i, values=(symbol, vol, avgprice, price, f"{'+' if percent > 0 else ''}{percent}", realised)) self.f.clf() times = self.account.VALUE.index values = self.account.VALUE.values self.a = self.f.add_subplot(111) self.a.grid(True) #self.a.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d")) #self.a.xaxis.set_minor_formatter(mdates.DateFormatter("%m-%d")) self.a.plot(self.account.VALUE) # self.value.set_data(times, values) try: #https://matplotlib.org/stable/gallery/text_labels_and_annotations/date_index_formatter.html self.a.set_xlim(times[0], times[-1]) self.a.set_ylim(min(values) * 0.99, max(values) * 1.01) #self.f.autofmt_xdate() self.canvas.draw() except Exception as e: self.logger.warn(f"Failed to update sideview {e}")