def load_feature_map(model, directory): r"""Load the feature map from storage. By default, the most recent feature map is loaded into memory. Parameters ---------- model : alphapy.Model The model object to contain the feature map. directory : str Full directory specification of the feature map's location. Returns ------- model : alphapy.Model The model object containing the feature map. """ # Locate the feature map and load it try: search_dir = SSEP.join([directory, 'model']) file_name = most_recent_file(search_dir, 'feature_map_*.pkl') logger.info("Loading feature map from %s", file_name) # load the feature map feature_map = joblib.load(file_name) model.feature_map = feature_map except: logging.error("Could not find feature map in %s", search_path) # Return the model with the feature map return model
def load_predictor(directory): r"""Load the model predictor from storage. By default, the most recent model is loaded into memory. Parameters ---------- directory : str Full directory specification of the predictor's location. Returns ------- predictor : function The scoring function. """ # Locate the model Pickle file try: search_dir = SSEP.join([directory, 'model']) file_name = most_recent_file(search_dir, 'model_*.pkl') logger.info("Loading model predictor from %s", file_name) # load the model predictor predictor = joblib.load(file_name) except: logging.error("Could not find model predictor in %s", search_path) # Return the model predictor return predictor
def load_predictor(directory): r"""Load the model predictor from storage. By default, the most recent model is loaded into memory. Parameters ---------- directory : str Full directory specification of the predictor's location. Returns ------- predictor : function The scoring function. """ # Locate the model Pickle or HD5 file search_dir = SSEP.join([directory, 'model']) file_name = most_recent_file(search_dir, 'model_*.*') # Load the model from the file file_ext = file_name.split(PSEP)[-1] if file_ext == 'pkl' or file_ext == 'h5': logger.info("Loading model predictor from %s", file_name) # load the model predictor if file_ext == 'pkl': predictor = joblib.load(file_name) elif file_ext == 'h5': predictor = load_model(file_name) else: logging.error("Could not find model predictor in %s", search_path) # Return the model predictor return predictor
def save_predictions(model, tag, partition): r"""Save the predictions to disk. Parameters ---------- model : alphapy.Model The model object to save. tag : str A unique identifier for the output files, e.g., a date stamp. partition : alphapy.Partition Reference to the dataset. Returns ------- preds : numpy array The prediction vector. probas : numpy array The probability vector. """ # Extract model parameters. directory = model.specs['directory'] extension = model.specs['extension'] model_type = model.specs['model_type'] separator = model.specs['separator'] # Get date stamp to record file creation timestamp = get_datestamp() # Specify input and output directories input_dir = SSEP.join([directory, 'input']) output_dir = SSEP.join([directory, 'output']) # Read the prediction frame file_spec = ''.join([datasets[partition], '*']) file_name = most_recent_file(input_dir, file_spec) file_name = file_name.split(SSEP)[-1].split(PSEP)[0] pf = read_frame(input_dir, file_name, extension, separator) # Cull records before the prediction date try: predict_date = model.specs['predict_date'] found_pdate = True except: found_pdate = False if found_pdate: pd_indices = pf[pf.date >= predict_date].index.tolist() pf = pf.iloc[pd_indices] else: pd_indices = pf.index.tolist() # Save predictions for all projects logger.info("Saving Predictions") output_file = USEP.join(['predictions', timestamp]) preds = model.preds[(tag, partition)].squeeze() if found_pdate: preds = np.take(preds, pd_indices) pred_series = pd.Series(preds, index=pd_indices) df_pred = pd.DataFrame(pred_series, columns=['prediction']) write_frame(df_pred, output_dir, output_file, extension, separator) # Save probabilities for classification projects probas = None if model_type == ModelType.classification: logger.info("Saving Probabilities") output_file = USEP.join(['probabilities', timestamp]) probas = model.probas[(tag, partition)].squeeze() if found_pdate: probas = np.take(probas, pd_indices) prob_series = pd.Series(probas, index=pd_indices) df_prob = pd.DataFrame(prob_series, columns=['probability']) write_frame(df_prob, output_dir, output_file, extension, separator) # Save ranked predictions logger.info("Saving Ranked Predictions") pf['prediction'] = pred_series if model_type == ModelType.classification: pf['probability'] = prob_series pf.sort_values('probability', ascending=False, inplace=True) else: pf.sort_values('prediction', ascending=False, inplace=True) output_file = USEP.join(['rankings', timestamp]) write_frame(pf, output_dir, output_file, extension, separator) # Return predictions and any probabilities return preds, probas
def trade_system(model, system, space, intraday, name, quantity): r"""Trade the given system. Parameters ---------- model : alphapy.Model The model object with specifications. system : alphapy.System The long/short system to run. space : alphapy.Space Namespace of instrument prices. intraday : bool If True, then run an intraday system. name : str The symbol to trade. quantity : float The amount of the ``name`` to trade, e.g., number of shares Returns ------- tradelist : list List of trade entries and exits. Other Parameters ---------------- Frame.frames : dict All of the data frames containing price data. """ # Unpack the model data. directory = model.specs['directory'] extension = model.specs['extension'] separator = model.specs['separator'] # Unpack the system parameters. longentry = system.longentry shortentry = system.shortentry longexit = system.longexit shortexit = system.shortexit holdperiod = system.holdperiod scale = system.scale # Determine whether or not this is a model-driven system. entries_and_exits = [longentry, shortentry, longexit, shortexit] active_signals = [x for x in entries_and_exits if x is not None] use_model = False for signal in active_signals: if any(x in signal for x in ['phigh', 'plow']): use_model = True # Read in the price frame pf = Frame.frames[frame_name(name, space)].df # Use model output probabilities as input to the system if use_model: # get latest probabilities file probs_dir = SSEP.join([directory, 'output']) file_path = most_recent_file(probs_dir, 'probabilities*') file_name = file_path.split(SSEP)[-1].split('.')[0] # read the probabilities frame and trim the price frame probs_frame = read_frame(probs_dir, file_name, extension, separator) pf = pf[-probs_frame.shape[0]:] probs_frame.index = pf.index probs_frame.columns = ['probability'] # add probability column to price frame pf = pd.concat([pf, probs_frame], axis=1) # Evaluate the long and short events in the price frame for signal in active_signals: vexec(pf, signal) # Initialize trading state variables inlong = False inshort = False h = 0 p = 0 q = quantity tradelist = [] # Loop through prices and generate trades for dt, row in pf.iterrows(): # get closing price c = row['close'] if intraday: bar_number = row['bar_number'] end_of_day = row['end_of_day'] # evaluate entry and exit conditions lerow = row[longentry] if longentry else None serow = row[shortentry] if shortentry else None lxrow = row[longexit] if longexit else None sxrow = row[shortexit] if shortexit else None # process the long and short events if lerow: if p < 0: # short active, so exit short tradelist.append((dt, [name, Orders.sx, -p, c])) inshort = False h = 0 p = 0 if p == 0 or scale: # go long (again) tradelist.append((dt, [name, Orders.le, q, c])) inlong = True p = p + q elif serow: if p > 0: # long active, so exit long tradelist.append((dt, [name, Orders.lx, -p, c])) inlong = False h = 0 p = 0 if p == 0 or scale: # go short (again) tradelist.append((dt, [name, Orders.se, -q, c])) inshort = True p = p - q # check exit conditions if inlong and h > 0 and lxrow: # long active, so exit long tradelist.append((dt, [name, Orders.lx, -p, c])) inlong = False h = 0 p = 0 if inshort and h > 0 and sxrow: # short active, so exit short tradelist.append((dt, [name, Orders.sx, -p, c])) inshort = False h = 0 p = 0 # if a holding period was given, then check for exit if holdperiod and h >= holdperiod: if inlong: tradelist.append((dt, [name, Orders.lh, -p, c])) inlong = False if inshort: tradelist.append((dt, [name, Orders.sh, -p, c])) inshort = False h = 0 p = 0 # increment the hold counter if inlong or inshort: h += 1 if intraday and end_of_day: if inlong: # long active, so exit long tradelist.append((dt, [name, Orders.lx, -p, c])) inlong = False if inshort: # short active, so exit short tradelist.append((dt, [name, Orders.sx, -p, c])) inshort = False h = 0 p = 0 return tradelist