def __init__(self, name, space = Space(), dynamic = True, members = set()): # code if not name in Group.groups: self.name = name self.space = space self.dynamic = dynamic self.members = members # add group to groups list Group.groups[name] = self else: logger.info("Group already %s exists", name)
def __init__(self, group_name, tag, space=Space(), maxpos=10, posby='close', kopos=0, koby='-profit', restricted=False, weightby='quantity', startcap=100000, margin=0.5, mincash=0.2, fixedfrac=0.1, maxloss=0.1): # initialization self.group_name = group_name self.tag = tag self.space = space self.positions = {} self.startdate = None self.enddate = None self.npos = 0 self.maxpos = maxpos self.posby = posby self.kopos = kopos self.koby = koby self.restricted = restricted self.weightby = weightby self.weights = [] self.startcap = startcap self.cash = startcap self.margin = margin self.mincash = mincash self.fixedfrac = fixedfrac self.maxloss = maxloss self.value = startcap self.netprofit = 0.0 self.netreturn = 0.0 self.totalprofit = 0.0 self.totalreturn = 0.0 # add portfolio to portfolios list pn = portfolio_name(group_name, tag) Portfolio.portfolios[pn] = self
def __new__(cls, group_name, tag, space=Space(), maxpos=10, posby='close', kopos=0, koby='-profit', restricted=False, weightby='quantity', startcap=100000, margin=0.5, mincash=0.2, fixedfrac=0.1, maxloss=0.1): # create portfolio name pn = portfolio_name(group_name, tag) if not pn in Portfolio.portfolios: return super(Portfolio, cls).__new__(cls) else: logger.info("Portfolio %s already exists", pn)
def get_market_config(): r"""Read the configuration file for MarketFlow. Parameters ---------- None : None Returns ------- specs : dict The parameters for controlling MarketFlow. """ logger.info("MarketFlow Configuration") # Read the configuration file full_path = SSEP.join([PSEP, 'config', 'market.yml']) with open(full_path, 'r') as ymlfile: cfg = yaml.load(ymlfile, Loader=yaml.FullLoader) # Store configuration parameters in dictionary specs = {} # Section: market [this section must be first] specs['create_model'] = cfg['market']['create_model'] fractal = cfg['market']['data_fractal'] try: _ = pd.to_timedelta(fractal) except: logger.info("data_fractal [%s] is an invalid pandas offset", fractal) specs['data_fractal'] = fractal specs['data_history'] = cfg['market']['data_history'] specs['forecast_period'] = cfg['market']['forecast_period'] fractal = cfg['market']['fractal'] try: test_interval = pd.to_timedelta(fractal) except: logger.info("fractal [%s] is an invalid pandas offset", fractal) specs['fractal'] = fractal specs['lag_period'] = cfg['market']['lag_period'] specs['leaders'] = cfg['market']['leaders'] specs['predict_history'] = cfg['market']['predict_history'] specs['schema'] = cfg['market']['schema'] specs['subschema'] = cfg['market']['subschema'] specs['api_key_name'] = cfg['market']['api_key_name'] specs['api_key'] = cfg['market']['api_key'] specs['subject'] = cfg['market']['subject'] specs['target_group'] = cfg['market']['target_group'] # Set API Key environment variable if specs['api_key']: os.environ[specs['api_key_name']] = specs['api_key'] # Create the subject/schema/fractal namespace sspecs = [specs['subject'], specs['schema'], specs['fractal']] space = Space(*sspecs) # Section: features try: logger.info("Getting Features") specs['features'] = cfg['features'] except: logger.info("No Features Found") specs['features'] = {} # Section: groups try: logger.info("Defining Groups") for g, m in list(cfg['groups'].items()): Group(g, space) Group.groups[g].add(m) except: logger.info("No Groups Found") # Section: aliases try: logger.info("Defining Aliases") for k, v in list(cfg['aliases'].items()): Alias(k, v) except: logger.info("No Aliases Found") # Section: system try: logger.info("Getting System Parameters") specs['system'] = cfg['system'] except: logger.info("No System Parameters Found") specs['system'] = {} # Section: variables logger.info("Defining AlphaPy Variables [phigh, plow]") Variable('phigh', 'probability >= 0.7') Variable('plow', 'probability <= 0.3') try: logger.info("Defining User Variables") for k, v in list(cfg['variables'].items()): Variable(k, v) except: logger.info("No Variables Found") # Section: functions try: logger.info("Getting Variable Functions") specs['functions'] = cfg['functions'] except: logger.info("No Variable Functions Found") specs['functions'] = {} # Log the stock parameters logger.info('MARKET PARAMETERS:') logger.info('api_key = %s', specs['api_key']) logger.info('api_key_name = %s', specs['api_key_name']) logger.info('create_model = %r', specs['create_model']) logger.info('data_fractal = %s', specs['data_fractal']) logger.info('data_history = %d', specs['data_history']) logger.info('features = %s', specs['features']) logger.info('forecast_period = %d', specs['forecast_period']) logger.info('fractal = %s', specs['fractal']) logger.info('lag_period = %d', specs['lag_period']) logger.info('leaders = %s', specs['leaders']) logger.info('predict_history = %s', specs['predict_history']) logger.info('schema = %s', specs['schema']) logger.info('subject = %s', specs['subject']) logger.info('subschema = %s', specs['subschema']) logger.info('system = %s', specs['system']) logger.info('target_group = %s', specs['target_group']) # Market Specifications return specs
def get_market_config(): r"""Read the configuration file for MarketFlow. Parameters ---------- None : None Returns ------- specs : dict The parameters for controlling MarketFlow. """ logger.info("MarketFlow Configuration") # Read the configuration file full_path = SSEP.join([PSEP, 'config', 'market.yml']) with open(full_path, 'r') as ymlfile: cfg = yaml.load(ymlfile) # Store configuration parameters in dictionary specs = {} # Section: market [this section must be first] specs['forecast_period'] = cfg['market']['forecast_period'] specs['fractal'] = cfg['market']['fractal'] specs['leaders'] = cfg['market']['leaders'] specs['data_history'] = cfg['market']['data_history'] specs['predict_history'] = cfg['market']['predict_history'] specs['schema'] = cfg['market']['schema'] specs['target_group'] = cfg['market']['target_group'] # Create the subject/schema/fractal namespace sspecs = ['stock', specs['schema'], specs['fractal']] space = Space(*sspecs) # Section: features try: logger.info("Getting Features") specs['features'] = cfg['features'] except: logger.info("No Features Found") specs['features'] = {} # Section: groups try: logger.info("Defining Groups") for g, m in cfg['groups'].items(): Group(g, space) Group.groups[g].add(m) except: logger.info("No Groups Found") # Section: aliases try: logger.info("Defining Aliases") for k, v in cfg['aliases'].items(): Alias(k, v) except: logger.info("No Aliases Found") # Section: system try: logger.info("Getting System Parameters") specs['system'] = cfg['system'] except: logger.info("No System Parameters Found") specs['system'] = {} # Section: variables try: logger.info("Defining Variables") for k, v in cfg['variables'].items(): Variable(k, v) except: logger.info("No Variables Found") # Section: functions try: logger.info("Getting Variable Functions") specs['functions'] = cfg['functions'] except: logger.info("No Variable Functions Found") specs['functions'] = {} # Log the stock parameters logger.info('MARKET PARAMETERS:') logger.info('features = %s', specs['features']) logger.info('forecast_period = %d', specs['forecast_period']) logger.info('fractal = %s', specs['fractal']) logger.info('leaders = %s', specs['leaders']) logger.info('data_history = %d', specs['data_history']) logger.info('predict_history = %s', specs['predict_history']) logger.info('schema = %s', specs['schema']) logger.info('system = %s', specs['system']) logger.info('target_group = %s', specs['target_group']) # Market Specifications return specs
def run_system(model, system, group, intraday=False, quantity=1): r"""Run a system for a given group, creating a trades frame. Parameters ---------- model : alphapy.Model The model object with specifications. system : alphapy.System The system to run. group : alphapy.Group The group of symbols to trade. intraday : bool, optional If true, this is an intraday system. quantity : float, optional The amount to trade for each symbol, e.g., number of shares Returns ------- tf : pandas.DataFrame All of the trades for this ``group``. """ system_name = system.name logger.info("Generating Trades for System %s", system_name) # Unpack the model data. directory = model.specs['directory'] extension = model.specs['extension'] separator = model.specs['separator'] # Extract the group information. gname = group.name gmembers = group.members gspace = group.space # Run the system for each member of the group gtlist = [] for symbol in gmembers: # generate the trades for this member tlist = trade_system(model, system, gspace, intraday, symbol, quantity) if tlist: # add trades to global trade list for item in tlist: gtlist.append(item) else: logger.info("No trades for symbol %s", symbol) # Create group trades frame tf = None if gtlist: tspace = Space(system_name, "trades", group.space.fractal) gtlist = sorted(gtlist, key=lambda x: x[0]) tf = DataFrame.from_items(gtlist, orient='index', columns=Trade.states) tfname = frame_name(gname, tspace) system_dir = SSEP.join([directory, 'systems']) labels = ['date'] if intraday: labels.append('time') write_frame(tf, system_dir, tfname, extension, separator, index=True, index_label=labels) del tspace else: logger.info("No trades were found") # Return trades frame return tf
def run_system(model, system, group, quantity=1): r"""Run a system for a given group, creating a trades frame. Parameters ---------- model : alphapy.Model The model object with specifications. system : alphapy.System or str The system to run, either a long/short system or a local one identified by function name, e.g., 'open_range_breakout'. group : alphapy.Group The group of symbols to test. quantity : float The amount to trade for each symbol, e.g., number of shares Returns ------- tf : pandas.DataFrame All of the trades for this ``group``. """ if system.__class__ == str: system_name = system else: system_name = system.name logger.info("Generating Trades for System %s", system_name) # Unpack the model data. directory = model.specs['directory'] extension = model.specs['extension'] separator = model.specs['separator'] # Extract the group information. gname = group.name gmembers = group.members gspace = group.space # Run the system for each member of the group gtlist = [] for symbol in gmembers: # generate the trades for this member if system.__class__ == str: try: tlist = globals()[system_name](symbol, gspace, quantity) except: logger.info("Could not execute system for %s", symbol) else: # call default long/short system tlist = long_short(system, symbol, gspace, quantity) if tlist: # create the local trades frame df = DataFrame.from_items(tlist, orient='index', columns=Trade.states) # add trades to global trade list for item in tlist: gtlist.append(item) else: logger.info("No trades for symbol %s", symbol) # Create group trades frame tf = None if gtlist: tspace = Space(system_name, "trades", group.space.fractal) gtlist = sorted(gtlist, key=lambda x: x[0]) tf = DataFrame.from_items(gtlist, orient='index', columns=Trade.states) tfname = frame_name(gname, tspace) system_dir = SSEP.join([directory, 'systems']) write_frame(tf, system_dir, tfname, extension, separator, index=True) del tspace else: logger.info("No trades were found") # Return trades frame return tf
def main(args=None): r"""The main program for SportFlow. Notes ----- (1) Initialize logging. (2) Parse the command line arguments. (3) Get the game configuration. (4) Get the model configuration. (5) Generate game frames for each season. (6) Create statistics for each team. (7) Merge the team frames into the final model frame. (8) Run the AlphaPy pipeline. Raises ------ ValueError Training date must be before prediction date. """ # Logging logging.basicConfig(format="[%(asctime)s] %(levelname)s\t%(message)s", filename="sport_flow.log", filemode='a', level=logging.DEBUG, datefmt='%m/%d/%y %H:%M:%S') formatter = logging.Formatter("[%(asctime)s] %(levelname)s\t%(message)s", datefmt='%m/%d/%y %H:%M:%S') console = logging.StreamHandler() console.setFormatter(formatter) console.setLevel(logging.INFO) logging.getLogger().addHandler(console) logger = logging.getLogger(__name__) # Start the pipeline logger.info('*'*80) logger.info("SportFlow Start") logger.info('*'*80) # Argument Parsing parser = argparse.ArgumentParser(description="SportFlow Parser") parser.add_argument('--pdate', dest='predict_date', help="prediction date is in the format: YYYY-MM-DD", required=False, type=valid_date) parser.add_argument('--tdate', dest='train_date', help="training date is in the format: YYYY-MM-DD", required=False, type=valid_date) parser.add_mutually_exclusive_group(required=False) parser.add_argument('--predict', dest='predict_mode', action='store_true') parser.add_argument('--train', dest='predict_mode', action='store_false') parser.set_defaults(predict_mode=False) args = parser.parse_args() # Set train and predict dates if args.train_date: train_date = args.train_date else: train_date = pd.datetime(1900, 1, 1).strftime("%Y-%m-%d") if args.predict_date: predict_date = args.predict_date else: predict_date = datetime.date.today().strftime("%Y-%m-%d") # Verify that the dates are in sequence. if train_date >= predict_date: raise ValueError("Training date must be before prediction date") else: logger.info("Training Date: %s", train_date) logger.info("Prediction Date: %s", predict_date) # Read game configuration file sport_specs = get_sport_config() # Section: game league = sport_specs['league'] points_max = sport_specs['points_max'] points_min = sport_specs['points_min'] random_scoring = sport_specs['random_scoring'] seasons = sport_specs['seasons'] window = sport_specs['rolling_window'] # Read model configuration file specs = get_model_config() # Add command line arguments to model specifications specs['predict_mode'] = args.predict_mode specs['predict_date'] = args.predict_date specs['train_date'] = args.train_date # Unpack model arguments directory = specs['directory'] target = specs['target'] # Create directories if necessary output_dirs = ['config', 'data', 'input', 'model', 'output', 'plots'] for od in output_dirs: output_dir = SSEP.join([directory, od]) if not os.path.exists(output_dir): logger.info("Creating directory %s", output_dir) os.makedirs(output_dir) # Create the game scores space space = Space('game', 'scores', '1g') # # Derived Variables # series = space.schema team1_prefix = 'home' team2_prefix = 'away' home_team = PSEP.join([team1_prefix, 'team']) away_team = PSEP.join([team2_prefix, 'team']) # # Read in the game frame. This is the feature generation phase. # logger.info("Reading Game Data") data_dir = SSEP.join([directory, 'data']) file_base = USEP.join([league, space.subject, space.schema, space.fractal]) df = read_frame(data_dir, file_base, specs['extension'], specs['separator']) logger.info("Total Game Records: %d", df.shape[0]) # # Locate any rows with null values # null_rows = df.isnull().any(axis=1) null_indices = [i for i, val in enumerate(null_rows.tolist()) if val == True] for i in null_indices: logger.info("Null Record: %d on Date: %s", i, df.date[i]) # # Run the game pipeline on a seasonal loop # if not seasons: # run model on all seasons seasons = df['season'].unique().tolist() # # Initialize the final frame # ff = pd.DataFrame() # # Iterate through each season of the game frame # for season in seasons: # Generate a frame for each season gf = df[df['season'] == season] gf = gf.reset_index() # Generate derived variables for the game frame total_games = gf.shape[0] if random_scoring: gf['home.score'] = np.random.randint(points_min, points_max, total_games) gf['away.score'] = np.random.randint(points_min, points_max, total_games) gf['total_points'] = gf['home.score'] + gf['away.score'] gf = add_features(gf, game_dict, gf.shape[0]) for index, row in gf.iterrows(): gf['point_margin_game'].at[index] = get_point_margin(row, 'home.score', 'away.score') gf['won_on_points'].at[index] = True if gf['point_margin_game'].at[index] > 0 else False gf['lost_on_points'].at[index] = True if gf['point_margin_game'].at[index] < 0 else False gf['cover_margin_game'].at[index] = gf['point_margin_game'].at[index] + row['line'] gf['won_on_spread'].at[index] = True if gf['cover_margin_game'].at[index] > 0 else False gf['lost_on_spread'].at[index] = True if gf['cover_margin_game'].at[index] <= 0 else False gf['overunder_margin'].at[index] = gf['total_points'].at[index] - row['over_under'] gf['over'].at[index] = True if gf['overunder_margin'].at[index] > 0 else False gf['under'].at[index] = True if gf['overunder_margin'].at[index] < 0 else False # Generate each team frame team_frames = {} teams = gf.groupby([home_team]) for team, data in teams: team_frame = USEP.join([league, team.lower(), series, str(season)]) logger.info("Generating team frame: %s", team_frame) tf = get_team_frame(gf, team, home_team, away_team) tf = tf.reset_index() tf = generate_team_frame(team, tf, home_team, away_team, window) team_frames[team_frame] = tf # Create the model frame, initializing the home and away frames mdict = {k:v for (k,v) in list(sports_dict.items()) if v != bool} team1_frame = pd.DataFrame() team1_frame = add_features(team1_frame, mdict, gf.shape[0], prefix=team1_prefix) team2_frame = pd.DataFrame() team2_frame = add_features(team2_frame, mdict, gf.shape[0], prefix=team2_prefix) frames = [gf, team1_frame, team2_frame] mf = pd.concat(frames, axis=1) # Loop through each team frame, inserting data into the model frame row # get index+1 [if valid] # determine if team is home or away to get prefix # try: np.where((gf[home_team] == 'PHI') & (gf['date'] == '09/07/14'))[0][0] # Assign team frame fields to respective model frame fields: set gf.at(pos, field) for team, data in teams: team_frame = USEP.join([league, team.lower(), series, str(season)]) logger.info("Merging team frame %s into model frame", team_frame) tf = team_frames[team_frame] for index in range(0, tf.shape[0]-1): gindex = index + 1 model_row = tf.iloc[gindex] key_date = model_row['date'] at_home = False if team == model_row[home_team]: at_home = True key_team = model_row[home_team] elif team == model_row[away_team]: key_team = model_row[away_team] else: raise KeyError("Team %s not found in Team Frame" % team) try: if at_home: mpos = np.where((mf[home_team] == key_team) & (mf['date'] == key_date))[0][0] else: mpos = np.where((mf[away_team] == key_team) & (mf['date'] == key_date))[0][0] except: raise IndexError("Team/Date Key not found in Model Frame") # print team, gindex, mpos # insert team data into model row mf = insert_model_data(mf, mpos, mdict, tf, index, team1_prefix if at_home else team2_prefix) # Compute delta data 'home' - 'away' mf = generate_delta_data(mf, mdict, team1_prefix, team2_prefix) # Append this to final frame frames = [ff, mf] ff = pd.concat(frames) # Write out dataframes input_dir = SSEP.join([directory, 'input']) if args.predict_mode: new_predict_frame = ff.loc[ff.date >= predict_date] if len(new_predict_frame) <= 1: raise ValueError("Prediction frame has length 1 or less") # rewrite with all the features to the train and test files logger.info("Saving prediction frame") write_frame(new_predict_frame, input_dir, datasets[Partition.predict], specs['extension'], specs['separator']) else: # split data into training and test data new_train_frame = ff.loc[(ff.date >= train_date) & (ff.date < predict_date)] if len(new_train_frame) <= 1: raise ValueError("Training frame has length 1 or less") new_test_frame = ff.loc[ff.date >= predict_date] if len(new_test_frame) <= 1: raise ValueError("Testing frame has length 1 or less") # rewrite with all the features to the train and test files logger.info("Saving training frame") write_frame(new_train_frame, input_dir, datasets[Partition.train], specs['extension'], specs['separator']) logger.info("Saving testing frame") write_frame(new_test_frame, input_dir, datasets[Partition.test], specs['extension'], specs['separator']) # Create the model from specs logger.info("Running Model") model = Model(specs) # Run the pipeline model = main_pipeline(model) # Complete the pipeline logger.info('*'*80) logger.info("SportFlow End") logger.info('*'*80)
def gen_portfolio(model, system, group, tframe, startcap=100000, posby='close'): r"""Create a portfolio from a trades frame. Parameters ---------- model : alphapy.Model The model with specifications. system : str Name of the system. group : alphapy.Group The group of instruments in the portfolio. tframe : pandas.DataFrame The input trade list from running the system. startcap : float Starting capital. posby : str The position sizing column in the price dataframe. Returns ------- p : alphapy.Portfolio The generated portfolio. Raises ------ MemoryError Could not allocate Portfolio. Notes ----- This function also generates the files required for analysis by the *pyfolio* package: * Returns File * Positions File * Transactions File """ logger.info("Creating Portfolio for System %s", system) # Unpack the model data. directory = model.specs['directory'] extension = model.specs['extension'] separator = model.specs['separator'] # Create the portfolio. gname = group.name gspace = group.space gmembers = group.members ff = 1.0 / len(gmembers) p = Portfolio(gname, system, gspace, startcap=startcap, posby=posby, restricted=False, fixedfrac=ff) if not p: raise MemoryError("Could not allocate Portfolio") # Build pyfolio data from the trades frame. start = tframe.index[0] end = tframe.index[-1] trange = np.unique( tframe.index.map(lambda x: x.date().strftime('%Y-%m-%d'))).tolist() drange = date_range(start, end).map(lambda x: x.date().strftime('%Y-%m-%d')) # Initialize return, position, and transaction data. rs = [] pcols = list(gmembers) pcols.extend(['cash']) pf = DataFrame(index=drange, columns=pcols).fillna(0.0) ts = [] # Iterate through the date range, updating the portfolio. for d in drange: # process today's trades if d in trange: trades = tframe.ix[d] if isinstance(trades, Series): trades = DataFrame(trades).transpose() for t in trades.iterrows(): tdate = t[0] row = t[1] tsize = exec_trade(p, row['name'], row['order'], row['quantity'], row['price'], tdate) if tsize != 0: ts.append((d, [tsize, row['price'], row['name']])) else: logger.info("Trade could not be executed for %s", row['name']) # iterate through current positions positions = p.positions pfrow = pf.ix[d] for key in positions: pos = positions[key] if pos.quantity > 0: value = pos.value else: value = -pos.value pfrow[pos.name] = value pfrow['cash'] = p.cash # update the portfolio returns p = valuate_portfolio(p, d) rs.append((d, [p.netreturn])) # Create systems directory path system_dir = SSEP.join([directory, 'systems']) # Create and record the returns frame for this system. logger.info("Recording Returns Frame") rspace = Space(system, 'returns', gspace.fractal) rf = DataFrame.from_items(rs, orient='index', columns=['return']) rfname = frame_name(gname, rspace) write_frame(rf, system_dir, rfname, extension, separator, index=True, index_label='date') del rspace # Record the positions frame for this system. logger.info("Recording Positions Frame") pspace = Space(system, 'positions', gspace.fractal) pfname = frame_name(gname, pspace) write_frame(pf, system_dir, pfname, extension, separator, index=True, index_label='date') del pspace # Create and record the transactions frame for this system. logger.info("Recording Transactions Frame") tspace = Space(system, 'transactions', gspace.fractal) tf = DataFrame.from_items(ts, orient='index', columns=['amount', 'price', 'symbol']) tfname = frame_name(gname, tspace) write_frame(tf, system_dir, tfname, extension, separator, index=True, index_label='date') del tspace # Return the portfolio. return p
def get_market_data(model, group, lookback_period, data_fractal, intraday_data=False): r"""Get data from an external feed. Parameters ---------- model : alphapy.Model The model object describing the data. group : alphapy.Group The group of symbols. lookback_period : int The number of periods of data to retrieve. data_fractal : str Pandas offset alias. intraday_data : bool If True, then get intraday data. Returns ------- n_periods : int The maximum number of periods actually retrieved. """ # Unpack model specifications directory = model.specs['directory'] extension = model.specs['extension'] separator = model.specs['separator'] # Unpack group elements gspace = group.space schema = gspace.schema fractal = gspace.fractal # Determine the feed source if intraday_data: # intraday data (date and time) logger.info("Getting Intraday Data [%s] from %s", data_fractal, schema) index_column = 'datetime' else: # daily data or higher (date only) logger.info("Getting Daily Data [%s] from %s", data_fractal, schema) index_column = 'date' # Get the data from the relevant feed data_dir = SSEP.join([directory, 'data']) pandas_data = any(substring in schema for substring in PD_WEB_DATA_FEEDS) n_periods = 0 resample_data = True if fractal != data_fractal else False df = None to_date = pd.to_datetime('today') from_date = to_date - pd.to_timedelta(lookback_period, unit='d') for item in group.members: logger.info("Getting %s data for last %d days", item, lookback_period) # Locate the data source if schema == 'data': # local intraday or daily dspace = Space(gspace.subject, gspace.schema, data_fractal) fname = frame_name(item.lower(), dspace) df = read_frame(data_dir, fname, extension, separator) elif schema == 'google' and intraday_data: # intraday only df = get_google_data(item, lookback_period, data_fractal) elif pandas_data: # daily only df = get_pandas_data(schema, item, lookback_period) else: logger.error("Unsupported Data Source: %s", schema) # Now that we have content, standardize the data if df is not None and not df.empty: logger.info("%d data points from %s to %s", len(df), from_date, to_date) # convert data to canonical form df = convert_data(df, index_column, intraday_data) # resample data and forward fill any NA values if resample_data: df = df.resample(fractal).agg({'open' : 'first', 'high' : 'max', 'low' : 'min', 'close' : 'last', 'volume' : 'sum'}) df.dropna(axis=0, how='any', inplace=True) logger.info("Rows after Resampling at %s: %d", fractal, len(df)) # add intraday columns if necessary if intraday_data: df = enhance_intraday_data(df) # allocate global Frame newf = Frame(item.lower(), gspace, df) if newf is None: logger.error("Could not allocate Frame for: %s", item) # calculate maximum number of periods df_len = len(df) if df_len > n_periods: n_periods = df_len else: logger.info("No DataFrame for %s", item) # The number of periods actually retrieved return n_periods
def get_market_data(model, market_specs, group, lookback_period, intraday_data=False): r"""Get data from an external feed. Parameters ---------- model : alphapy.Model The model object describing the data. market_specs : dict The specifications for controlling the MarketFlow pipeline. group : alphapy.Group The group of symbols. lookback_period : int The number of periods of data to retrieve. intraday_data : bool If True, then get intraday data. Returns ------- n_periods : int The maximum number of periods actually retrieved. """ # Unpack market specifications data_fractal = market_specs['data_fractal'] subschema = market_specs['subschema'] # Unpack model specifications directory = model.specs['directory'] extension = model.specs['extension'] separator = model.specs['separator'] # Unpack group elements gspace = group.space schema = gspace.schema fractal = gspace.fractal # Determine the feed source if intraday_data: # intraday data (date and time) logger.info("%s Intraday Data [%s] for %d periods", schema, data_fractal, lookback_period) index_column = 'datetime' else: # daily data or higher (date only) logger.info("%s Daily Data [%s] for %d periods", schema, data_fractal, lookback_period) index_column = 'date' # Get the data from the relevant feed data_dir = SSEP.join([directory, 'data']) n_periods = 0 resample_data = True if fractal != data_fractal else False # Date Arithmetic to_date = pd.to_datetime('today') from_date = to_date - pd.to_timedelta(lookback_period, unit='d') to_date = to_date.strftime('%Y-%m-%d') from_date = from_date.strftime('%Y-%m-%d') # Get the data from the specified data feed df = pd.DataFrame() for symbol in group.members: logger.info("Getting %s data from %s to %s", symbol.upper(), from_date, to_date) # Locate the data source if schema == 'data': # local intraday or daily dspace = Space(gspace.subject, gspace.schema, data_fractal) fname = frame_name(symbol.lower(), dspace) df = read_frame(data_dir, fname, extension, separator) elif schema in data_dispatch_table.keys(): df = data_dispatch_table[schema](schema, subschema, symbol, intraday_data, data_fractal, from_date, to_date, lookback_period) else: logger.error("Unsupported Data Source: %s", schema) # Now that we have content, standardize the data if not df.empty: logger.info("Rows: %d [%s]", len(df), data_fractal) # convert data to canonical form df = convert_data(df, index_column, intraday_data) # resample data and forward fill any NA values if resample_data: df = df.resample(fractal).agg({ 'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum' }) df.dropna(axis=0, how='any', inplace=True) logger.info("Rows after Resampling at %s: %d", fractal, len(df)) # add intraday columns if necessary if intraday_data: df = enhance_intraday_data(df) # allocate global Frame newf = Frame(symbol.lower(), gspace, df) if newf is None: logger.error("Could not allocate Frame for: %s", symbol.upper()) # calculate maximum number of periods df_len = len(df) if df_len > n_periods: n_periods = df_len else: logger.info("No DataFrame for %s", symbol.upper()) # The number of periods actually retrieved return n_periods