示例#1
0
 def __init__(self,
              name,
              space = Space(),
              dynamic = True,
              members = set()):
     # code
     if not name in Group.groups:
         self.name = name
         self.space = space
         self.dynamic =  dynamic
         self.members = members
         # add group to groups list
         Group.groups[name] = self
     else:
         logger.info("Group already %s exists", name)
示例#2
0
 def __init__(self,
              group_name,
              tag,
              space=Space(),
              maxpos=10,
              posby='close',
              kopos=0,
              koby='-profit',
              restricted=False,
              weightby='quantity',
              startcap=100000,
              margin=0.5,
              mincash=0.2,
              fixedfrac=0.1,
              maxloss=0.1):
     # initialization
     self.group_name = group_name
     self.tag = tag
     self.space = space
     self.positions = {}
     self.startdate = None
     self.enddate = None
     self.npos = 0
     self.maxpos = maxpos
     self.posby = posby
     self.kopos = kopos
     self.koby = koby
     self.restricted = restricted
     self.weightby = weightby
     self.weights = []
     self.startcap = startcap
     self.cash = startcap
     self.margin = margin
     self.mincash = mincash
     self.fixedfrac = fixedfrac
     self.maxloss = maxloss
     self.value = startcap
     self.netprofit = 0.0
     self.netreturn = 0.0
     self.totalprofit = 0.0
     self.totalreturn = 0.0
     # add portfolio to portfolios list
     pn = portfolio_name(group_name, tag)
     Portfolio.portfolios[pn] = self
示例#3
0
 def __new__(cls,
             group_name,
             tag,
             space=Space(),
             maxpos=10,
             posby='close',
             kopos=0,
             koby='-profit',
             restricted=False,
             weightby='quantity',
             startcap=100000,
             margin=0.5,
             mincash=0.2,
             fixedfrac=0.1,
             maxloss=0.1):
     # create portfolio name
     pn = portfolio_name(group_name, tag)
     if not pn in Portfolio.portfolios:
         return super(Portfolio, cls).__new__(cls)
     else:
         logger.info("Portfolio %s already exists", pn)
示例#4
0
def get_market_config():
    r"""Read the configuration file for MarketFlow.

    Parameters
    ----------
    None : None

    Returns
    -------
    specs : dict
        The parameters for controlling MarketFlow.

    """

    logger.info("MarketFlow Configuration")

    # Read the configuration file

    full_path = SSEP.join([PSEP, 'config', 'market.yml'])
    with open(full_path, 'r') as ymlfile:
        cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)

    # Store configuration parameters in dictionary

    specs = {}

    # Section: market [this section must be first]

    specs['create_model'] = cfg['market']['create_model']
    fractal = cfg['market']['data_fractal']
    try:
        _ = pd.to_timedelta(fractal)
    except:
        logger.info("data_fractal [%s] is an invalid pandas offset", fractal)
    specs['data_fractal'] = fractal
    specs['data_history'] = cfg['market']['data_history']
    specs['forecast_period'] = cfg['market']['forecast_period']
    fractal = cfg['market']['fractal']
    try:
        test_interval = pd.to_timedelta(fractal)
    except:
        logger.info("fractal [%s] is an invalid pandas offset", fractal)
    specs['fractal'] = fractal
    specs['lag_period'] = cfg['market']['lag_period']
    specs['leaders'] = cfg['market']['leaders']
    specs['predict_history'] = cfg['market']['predict_history']
    specs['schema'] = cfg['market']['schema']
    specs['subschema'] = cfg['market']['subschema']
    specs['api_key_name'] = cfg['market']['api_key_name']
    specs['api_key'] = cfg['market']['api_key']
    specs['subject'] = cfg['market']['subject']
    specs['target_group'] = cfg['market']['target_group']

    # Set API Key environment variable
    if specs['api_key']:
        os.environ[specs['api_key_name']] = specs['api_key']

    # Create the subject/schema/fractal namespace

    sspecs = [specs['subject'], specs['schema'], specs['fractal']]
    space = Space(*sspecs)

    # Section: features

    try:
        logger.info("Getting Features")
        specs['features'] = cfg['features']
    except:
        logger.info("No Features Found")
        specs['features'] = {}

    # Section: groups

    try:
        logger.info("Defining Groups")
        for g, m in list(cfg['groups'].items()):
            Group(g, space)
            Group.groups[g].add(m)
    except:
        logger.info("No Groups Found")

    # Section: aliases

    try:
        logger.info("Defining Aliases")
        for k, v in list(cfg['aliases'].items()):
            Alias(k, v)
    except:
        logger.info("No Aliases Found")

    # Section: system

    try:
        logger.info("Getting System Parameters")
        specs['system'] = cfg['system']
    except:
        logger.info("No System Parameters Found")
        specs['system'] = {}

    # Section: variables

    logger.info("Defining AlphaPy Variables [phigh, plow]")

    Variable('phigh', 'probability >= 0.7')
    Variable('plow', 'probability <= 0.3')

    try:
        logger.info("Defining User Variables")
        for k, v in list(cfg['variables'].items()):
            Variable(k, v)
    except:
        logger.info("No Variables Found")

    # Section: functions

    try:
        logger.info("Getting Variable Functions")
        specs['functions'] = cfg['functions']
    except:
        logger.info("No Variable Functions Found")
        specs['functions'] = {}

    # Log the stock parameters

    logger.info('MARKET PARAMETERS:')
    logger.info('api_key         = %s', specs['api_key'])
    logger.info('api_key_name    = %s', specs['api_key_name'])
    logger.info('create_model    = %r', specs['create_model'])
    logger.info('data_fractal    = %s', specs['data_fractal'])
    logger.info('data_history    = %d', specs['data_history'])
    logger.info('features        = %s', specs['features'])
    logger.info('forecast_period = %d', specs['forecast_period'])
    logger.info('fractal         = %s', specs['fractal'])
    logger.info('lag_period      = %d', specs['lag_period'])
    logger.info('leaders         = %s', specs['leaders'])
    logger.info('predict_history = %s', specs['predict_history'])
    logger.info('schema          = %s', specs['schema'])
    logger.info('subject         = %s', specs['subject'])
    logger.info('subschema       = %s', specs['subschema'])
    logger.info('system          = %s', specs['system'])
    logger.info('target_group    = %s', specs['target_group'])

    # Market Specifications
    return specs
示例#5
0
def get_market_config():
    r"""Read the configuration file for MarketFlow.

    Parameters
    ----------
    None : None

    Returns
    -------
    specs : dict
        The parameters for controlling MarketFlow.

    """

    logger.info("MarketFlow Configuration")

    # Read the configuration file

    full_path = SSEP.join([PSEP, 'config', 'market.yml'])
    with open(full_path, 'r') as ymlfile:
        cfg = yaml.load(ymlfile)

    # Store configuration parameters in dictionary

    specs = {}

    # Section: market [this section must be first]

    specs['forecast_period'] = cfg['market']['forecast_period']
    specs['fractal'] = cfg['market']['fractal']
    specs['leaders'] = cfg['market']['leaders']
    specs['data_history'] = cfg['market']['data_history']
    specs['predict_history'] = cfg['market']['predict_history']
    specs['schema'] = cfg['market']['schema']
    specs['target_group'] = cfg['market']['target_group']

    # Create the subject/schema/fractal namespace

    sspecs = ['stock', specs['schema'], specs['fractal']]
    space = Space(*sspecs)

    # Section: features

    try:
        logger.info("Getting Features")
        specs['features'] = cfg['features']
    except:
        logger.info("No Features Found")
        specs['features'] = {}

    # Section: groups

    try:
        logger.info("Defining Groups")
        for g, m in cfg['groups'].items():
            Group(g, space)
            Group.groups[g].add(m)
    except:
        logger.info("No Groups Found")

    # Section: aliases

    try:
        logger.info("Defining Aliases")
        for k, v in cfg['aliases'].items():
            Alias(k, v)
    except:
        logger.info("No Aliases Found")

    # Section: system

    try:
        logger.info("Getting System Parameters")
        specs['system'] = cfg['system']
    except:
        logger.info("No System Parameters Found")
        specs['system'] = {}

    # Section: variables

    try:
        logger.info("Defining Variables")
        for k, v in cfg['variables'].items():
            Variable(k, v)
    except:
        logger.info("No Variables Found")

    # Section: functions

    try:
        logger.info("Getting Variable Functions")
        specs['functions'] = cfg['functions']
    except:
        logger.info("No Variable Functions Found")
        specs['functions'] = {}

    # Log the stock parameters

    logger.info('MARKET PARAMETERS:')
    logger.info('features        = %s', specs['features'])
    logger.info('forecast_period = %d', specs['forecast_period'])
    logger.info('fractal         = %s', specs['fractal'])
    logger.info('leaders         = %s', specs['leaders'])
    logger.info('data_history    = %d', specs['data_history'])
    logger.info('predict_history = %s', specs['predict_history'])
    logger.info('schema          = %s', specs['schema'])
    logger.info('system          = %s', specs['system'])
    logger.info('target_group    = %s', specs['target_group'])

    # Market Specifications
    return specs
示例#6
0
def run_system(model, system, group, intraday=False, quantity=1):
    r"""Run a system for a given group, creating a trades frame.

    Parameters
    ----------
    model : alphapy.Model
        The model object with specifications.
    system : alphapy.System
        The system to run.
    group : alphapy.Group
        The group of symbols to trade.
    intraday : bool, optional
        If true, this is an intraday system.
    quantity : float, optional
        The amount to trade for each symbol, e.g., number of shares

    Returns
    -------
    tf : pandas.DataFrame
        All of the trades for this ``group``.

    """

    system_name = system.name
    logger.info("Generating Trades for System %s", system_name)

    # Unpack the model data.

    directory = model.specs['directory']
    extension = model.specs['extension']
    separator = model.specs['separator']

    # Extract the group information.

    gname = group.name
    gmembers = group.members
    gspace = group.space

    # Run the system for each member of the group

    gtlist = []
    for symbol in gmembers:
        # generate the trades for this member
        tlist = trade_system(model, system, gspace, intraday, symbol, quantity)
        if tlist:
            # add trades to global trade list
            for item in tlist:
                gtlist.append(item)
        else:
            logger.info("No trades for symbol %s", symbol)

    # Create group trades frame

    tf = None
    if gtlist:
        tspace = Space(system_name, "trades", group.space.fractal)
        gtlist = sorted(gtlist, key=lambda x: x[0])
        tf = DataFrame.from_items(gtlist, orient='index', columns=Trade.states)
        tfname = frame_name(gname, tspace)
        system_dir = SSEP.join([directory, 'systems'])
        labels = ['date']
        if intraday:
            labels.append('time')
        write_frame(tf,
                    system_dir,
                    tfname,
                    extension,
                    separator,
                    index=True,
                    index_label=labels)
        del tspace
    else:
        logger.info("No trades were found")

    # Return trades frame
    return tf
示例#7
0
def run_system(model, system, group, quantity=1):
    r"""Run a system for a given group, creating a trades frame.

    Parameters
    ----------
    model : alphapy.Model
        The model object with specifications.
    system : alphapy.System or str
        The system to run, either a long/short system or a local one
        identified by function name, e.g., 'open_range_breakout'.
    group : alphapy.Group
        The group of symbols to test.
    quantity : float
        The amount to trade for each symbol, e.g., number of shares

    Returns
    -------
    tf : pandas.DataFrame
        All of the trades for this ``group``.

    """

    if system.__class__ == str:
        system_name = system
    else:
        system_name = system.name

    logger.info("Generating Trades for System %s", system_name)

    # Unpack the model data.

    directory = model.specs['directory']
    extension = model.specs['extension']
    separator = model.specs['separator']

    # Extract the group information.

    gname = group.name
    gmembers = group.members
    gspace = group.space

    # Run the system for each member of the group

    gtlist = []
    for symbol in gmembers:
        # generate the trades for this member
        if system.__class__ == str:
            try:
                tlist = globals()[system_name](symbol, gspace, quantity)
            except:
                logger.info("Could not execute system for %s", symbol)
        else:
            # call default long/short system
            tlist = long_short(system, symbol, gspace, quantity)
        if tlist:
            # create the local trades frame
            df = DataFrame.from_items(tlist,
                                      orient='index',
                                      columns=Trade.states)
            # add trades to global trade list
            for item in tlist:
                gtlist.append(item)
        else:
            logger.info("No trades for symbol %s", symbol)

    # Create group trades frame

    tf = None
    if gtlist:
        tspace = Space(system_name, "trades", group.space.fractal)
        gtlist = sorted(gtlist, key=lambda x: x[0])
        tf = DataFrame.from_items(gtlist, orient='index', columns=Trade.states)
        tfname = frame_name(gname, tspace)
        system_dir = SSEP.join([directory, 'systems'])
        write_frame(tf, system_dir, tfname, extension, separator, index=True)
        del tspace
    else:
        logger.info("No trades were found")

    # Return trades frame
    return tf
示例#8
0
def main(args=None):
    r"""The main program for SportFlow.

    Notes
    -----
    (1) Initialize logging.
    (2) Parse the command line arguments.
    (3) Get the game configuration.
    (4) Get the model configuration.
    (5) Generate game frames for each season.
    (6) Create statistics for each team.
    (7) Merge the team frames into the final model frame.
    (8) Run the AlphaPy pipeline.

    Raises
    ------
    ValueError
        Training date must be before prediction date.

    """

    # Logging

    logging.basicConfig(format="[%(asctime)s] %(levelname)s\t%(message)s",
                        filename="sport_flow.log", filemode='a', level=logging.DEBUG,
                        datefmt='%m/%d/%y %H:%M:%S')
    formatter = logging.Formatter("[%(asctime)s] %(levelname)s\t%(message)s",
                                  datefmt='%m/%d/%y %H:%M:%S')
    console = logging.StreamHandler()
    console.setFormatter(formatter)
    console.setLevel(logging.INFO)
    logging.getLogger().addHandler(console)

    logger = logging.getLogger(__name__)

    # Start the pipeline

    logger.info('*'*80)
    logger.info("SportFlow Start")
    logger.info('*'*80)

    # Argument Parsing

    parser = argparse.ArgumentParser(description="SportFlow Parser")
    parser.add_argument('--pdate', dest='predict_date',
                        help="prediction date is in the format: YYYY-MM-DD",
                        required=False, type=valid_date)
    parser.add_argument('--tdate', dest='train_date',
                        help="training date is in the format: YYYY-MM-DD",
                        required=False, type=valid_date)
    parser.add_mutually_exclusive_group(required=False)
    parser.add_argument('--predict', dest='predict_mode', action='store_true')
    parser.add_argument('--train', dest='predict_mode', action='store_false')
    parser.set_defaults(predict_mode=False)
    args = parser.parse_args()

    # Set train and predict dates

    if args.train_date:
        train_date = args.train_date
    else:
        train_date = pd.datetime(1900, 1, 1).strftime("%Y-%m-%d")

    if args.predict_date:
        predict_date = args.predict_date
    else:
        predict_date = datetime.date.today().strftime("%Y-%m-%d")

    # Verify that the dates are in sequence.

    if train_date >= predict_date:
        raise ValueError("Training date must be before prediction date")
    else:
        logger.info("Training Date: %s", train_date)
        logger.info("Prediction Date: %s", predict_date)

    # Read game configuration file

    sport_specs = get_sport_config()

    # Section: game

    league = sport_specs['league']
    points_max = sport_specs['points_max']
    points_min = sport_specs['points_min']
    random_scoring = sport_specs['random_scoring']
    seasons = sport_specs['seasons']
    window = sport_specs['rolling_window']   

    # Read model configuration file

    specs = get_model_config()

    # Add command line arguments to model specifications

    specs['predict_mode'] = args.predict_mode
    specs['predict_date'] = args.predict_date
    specs['train_date'] = args.train_date

    # Unpack model arguments

    directory = specs['directory']
    target = specs['target']

    # Create directories if necessary

    output_dirs = ['config', 'data', 'input', 'model', 'output', 'plots']
    for od in output_dirs:
        output_dir = SSEP.join([directory, od])
        if not os.path.exists(output_dir):
            logger.info("Creating directory %s", output_dir)
            os.makedirs(output_dir)

    # Create the game scores space
    space = Space('game', 'scores', '1g')

    #
    # Derived Variables
    #

    series = space.schema
    team1_prefix = 'home'
    team2_prefix = 'away'
    home_team = PSEP.join([team1_prefix, 'team'])
    away_team = PSEP.join([team2_prefix, 'team'])

    #
    # Read in the game frame. This is the feature generation phase.
    #

    logger.info("Reading Game Data")

    data_dir = SSEP.join([directory, 'data'])
    file_base = USEP.join([league, space.subject, space.schema, space.fractal])
    df = read_frame(data_dir, file_base, specs['extension'], specs['separator'])
    logger.info("Total Game Records: %d", df.shape[0])

    #
    # Locate any rows with null values
    #

    null_rows = df.isnull().any(axis=1)
    null_indices = [i for i, val in enumerate(null_rows.tolist()) if val == True]
    for i in null_indices:
        logger.info("Null Record: %d on Date: %s", i, df.date[i])

    #
    # Run the game pipeline on a seasonal loop
    #

    if not seasons:
        # run model on all seasons
        seasons = df['season'].unique().tolist()

    #
    # Initialize the final frame
    #

    ff = pd.DataFrame()

    #
    # Iterate through each season of the game frame
    #

    for season in seasons:

        # Generate a frame for each season

        gf = df[df['season'] == season]
        gf = gf.reset_index()

        # Generate derived variables for the game frame

        total_games = gf.shape[0]
        if random_scoring:
            gf['home.score'] = np.random.randint(points_min, points_max, total_games)
            gf['away.score'] = np.random.randint(points_min, points_max, total_games)
        gf['total_points'] = gf['home.score'] + gf['away.score']

        gf = add_features(gf, game_dict, gf.shape[0])
        for index, row in gf.iterrows():
            gf['point_margin_game'].at[index] = get_point_margin(row, 'home.score', 'away.score')
            gf['won_on_points'].at[index] = True if gf['point_margin_game'].at[index] > 0 else False
            gf['lost_on_points'].at[index] = True if gf['point_margin_game'].at[index] < 0 else False
            gf['cover_margin_game'].at[index] = gf['point_margin_game'].at[index] + row['line']
            gf['won_on_spread'].at[index] = True if gf['cover_margin_game'].at[index] > 0 else False
            gf['lost_on_spread'].at[index] = True if gf['cover_margin_game'].at[index] <= 0 else False
            gf['overunder_margin'].at[index] = gf['total_points'].at[index] - row['over_under']
            gf['over'].at[index] = True if gf['overunder_margin'].at[index] > 0 else False
            gf['under'].at[index] = True if gf['overunder_margin'].at[index] < 0 else False

        # Generate each team frame

        team_frames = {}
        teams = gf.groupby([home_team])
        for team, data in teams:
            team_frame = USEP.join([league, team.lower(), series, str(season)])
            logger.info("Generating team frame: %s", team_frame)
            tf = get_team_frame(gf, team, home_team, away_team)
            tf = tf.reset_index()
            tf = generate_team_frame(team, tf, home_team, away_team, window)
            team_frames[team_frame] = tf

        # Create the model frame, initializing the home and away frames

        mdict = {k:v for (k,v) in list(sports_dict.items()) if v != bool}
        team1_frame = pd.DataFrame()
        team1_frame = add_features(team1_frame, mdict, gf.shape[0], prefix=team1_prefix)
        team2_frame = pd.DataFrame()
        team2_frame = add_features(team2_frame, mdict, gf.shape[0], prefix=team2_prefix)
        frames = [gf, team1_frame, team2_frame]
        mf = pd.concat(frames, axis=1)

        # Loop through each team frame, inserting data into the model frame row
        #     get index+1 [if valid]
        #     determine if team is home or away to get prefix
        #     try: np.where((gf[home_team] == 'PHI') & (gf['date'] == '09/07/14'))[0][0]
        #     Assign team frame fields to respective model frame fields: set gf.at(pos, field)

        for team, data in teams:
            team_frame = USEP.join([league, team.lower(), series, str(season)])
            logger.info("Merging team frame %s into model frame", team_frame)
            tf = team_frames[team_frame]
            for index in range(0, tf.shape[0]-1):
                gindex = index + 1
                model_row = tf.iloc[gindex]
                key_date = model_row['date']
                at_home = False
                if team == model_row[home_team]:
                    at_home = True
                    key_team = model_row[home_team]
                elif team == model_row[away_team]:
                    key_team = model_row[away_team]
                else:
                    raise KeyError("Team %s not found in Team Frame" % team)            
                try:
                    if at_home:
                        mpos = np.where((mf[home_team] == key_team) & (mf['date'] == key_date))[0][0]
                    else:
                        mpos = np.where((mf[away_team] == key_team) & (mf['date'] == key_date))[0][0]
                except:
                    raise IndexError("Team/Date Key not found in Model Frame")
                # print team, gindex, mpos
                # insert team data into model row
                mf = insert_model_data(mf, mpos, mdict, tf, index, team1_prefix if at_home else team2_prefix)

        # Compute delta data 'home' - 'away'
        mf = generate_delta_data(mf, mdict, team1_prefix, team2_prefix)

        # Append this to final frame
        frames = [ff, mf]
        ff = pd.concat(frames)

    # Write out dataframes

    input_dir = SSEP.join([directory, 'input'])
    if args.predict_mode:
        new_predict_frame = ff.loc[ff.date >= predict_date]
        if len(new_predict_frame) <= 1:
            raise ValueError("Prediction frame has length 1 or less")
        # rewrite with all the features to the train and test files
        logger.info("Saving prediction frame")
        write_frame(new_predict_frame, input_dir, datasets[Partition.predict],
                    specs['extension'], specs['separator'])
    else:
        # split data into training and test data
        new_train_frame = ff.loc[(ff.date >= train_date) & (ff.date < predict_date)]
        if len(new_train_frame) <= 1:
            raise ValueError("Training frame has length 1 or less")
        new_test_frame = ff.loc[ff.date >= predict_date]
        if len(new_test_frame) <= 1:
            raise ValueError("Testing frame has length 1 or less")
        # rewrite with all the features to the train and test files
        logger.info("Saving training frame")
        write_frame(new_train_frame, input_dir, datasets[Partition.train],
                    specs['extension'], specs['separator'])
        logger.info("Saving testing frame")
        write_frame(new_test_frame, input_dir, datasets[Partition.test],
                    specs['extension'], specs['separator'])

    # Create the model from specs

    logger.info("Running Model")
    model = Model(specs)

    # Run the pipeline
    model = main_pipeline(model)

    # Complete the pipeline

    logger.info('*'*80)
    logger.info("SportFlow End")
    logger.info('*'*80)
示例#9
0
def gen_portfolio(model,
                  system,
                  group,
                  tframe,
                  startcap=100000,
                  posby='close'):
    r"""Create a portfolio from a trades frame.

    Parameters
    ----------
    model : alphapy.Model
        The model with specifications.
    system : str
        Name of the system.
    group : alphapy.Group
        The group of instruments in the portfolio.
    tframe : pandas.DataFrame
        The input trade list from running the system.
    startcap : float
        Starting capital.
    posby : str
        The position sizing column in the price dataframe.

    Returns
    -------
    p : alphapy.Portfolio
        The generated portfolio.

    Raises
    ------
    MemoryError
        Could not allocate Portfolio.

    Notes
    -----

    This function also generates the files required for analysis
    by the *pyfolio* package:

    * Returns File
    * Positions File
    * Transactions File

    """

    logger.info("Creating Portfolio for System %s", system)

    # Unpack the model data.

    directory = model.specs['directory']
    extension = model.specs['extension']
    separator = model.specs['separator']

    # Create the portfolio.

    gname = group.name
    gspace = group.space
    gmembers = group.members
    ff = 1.0 / len(gmembers)

    p = Portfolio(gname,
                  system,
                  gspace,
                  startcap=startcap,
                  posby=posby,
                  restricted=False,
                  fixedfrac=ff)
    if not p:
        raise MemoryError("Could not allocate Portfolio")

    # Build pyfolio data from the trades frame.

    start = tframe.index[0]
    end = tframe.index[-1]
    trange = np.unique(
        tframe.index.map(lambda x: x.date().strftime('%Y-%m-%d'))).tolist()
    drange = date_range(start,
                        end).map(lambda x: x.date().strftime('%Y-%m-%d'))

    # Initialize return, position, and transaction data.

    rs = []
    pcols = list(gmembers)
    pcols.extend(['cash'])
    pf = DataFrame(index=drange, columns=pcols).fillna(0.0)
    ts = []

    # Iterate through the date range, updating the portfolio.
    for d in drange:
        # process today's trades
        if d in trange:
            trades = tframe.ix[d]
            if isinstance(trades, Series):
                trades = DataFrame(trades).transpose()
            for t in trades.iterrows():
                tdate = t[0]
                row = t[1]
                tsize = exec_trade(p, row['name'], row['order'],
                                   row['quantity'], row['price'], tdate)
                if tsize != 0:
                    ts.append((d, [tsize, row['price'], row['name']]))
                else:
                    logger.info("Trade could not be executed for %s",
                                row['name'])
        # iterate through current positions
        positions = p.positions
        pfrow = pf.ix[d]
        for key in positions:
            pos = positions[key]
            if pos.quantity > 0:
                value = pos.value
            else:
                value = -pos.value
            pfrow[pos.name] = value
        pfrow['cash'] = p.cash
        # update the portfolio returns
        p = valuate_portfolio(p, d)
        rs.append((d, [p.netreturn]))

    # Create systems directory path

    system_dir = SSEP.join([directory, 'systems'])

    # Create and record the returns frame for this system.

    logger.info("Recording Returns Frame")
    rspace = Space(system, 'returns', gspace.fractal)
    rf = DataFrame.from_items(rs, orient='index', columns=['return'])
    rfname = frame_name(gname, rspace)
    write_frame(rf,
                system_dir,
                rfname,
                extension,
                separator,
                index=True,
                index_label='date')
    del rspace

    # Record the positions frame for this system.

    logger.info("Recording Positions Frame")
    pspace = Space(system, 'positions', gspace.fractal)
    pfname = frame_name(gname, pspace)
    write_frame(pf,
                system_dir,
                pfname,
                extension,
                separator,
                index=True,
                index_label='date')
    del pspace

    # Create and record the transactions frame for this system.

    logger.info("Recording Transactions Frame")
    tspace = Space(system, 'transactions', gspace.fractal)
    tf = DataFrame.from_items(ts,
                              orient='index',
                              columns=['amount', 'price', 'symbol'])
    tfname = frame_name(gname, tspace)
    write_frame(tf,
                system_dir,
                tfname,
                extension,
                separator,
                index=True,
                index_label='date')
    del tspace

    # Return the portfolio.
    return p
示例#10
0
def get_market_data(model, group, lookback_period,
                    data_fractal, intraday_data=False):
    r"""Get data from an external feed.

    Parameters
    ----------
    model : alphapy.Model
        The model object describing the data.
    group : alphapy.Group
        The group of symbols.
    lookback_period : int
        The number of periods of data to retrieve.
    data_fractal : str
        Pandas offset alias.
    intraday_data : bool
        If True, then get intraday data.

    Returns
    -------
    n_periods : int
        The maximum number of periods actually retrieved.

    """

    # Unpack model specifications

    directory = model.specs['directory']
    extension = model.specs['extension']
    separator = model.specs['separator']

    # Unpack group elements

    gspace = group.space
    schema = gspace.schema
    fractal = gspace.fractal

    # Determine the feed source

    if intraday_data:
        # intraday data (date and time)
        logger.info("Getting Intraday Data [%s] from %s", data_fractal, schema)
        index_column = 'datetime'
    else:
        # daily data or higher (date only)
        logger.info("Getting Daily Data [%s] from %s", data_fractal, schema)
        index_column = 'date'

    # Get the data from the relevant feed

    data_dir = SSEP.join([directory, 'data'])
    pandas_data = any(substring in schema for substring in PD_WEB_DATA_FEEDS)
    n_periods = 0
    resample_data = True if fractal != data_fractal else False
    df = None
    to_date = pd.to_datetime('today')
    from_date = to_date - pd.to_timedelta(lookback_period, unit='d')

    for item in group.members:
        logger.info("Getting %s data for last %d days", item, lookback_period)
        # Locate the data source
        if schema == 'data':
            # local intraday or daily
            dspace = Space(gspace.subject, gspace.schema, data_fractal)
            fname = frame_name(item.lower(), dspace)
            df = read_frame(data_dir, fname, extension, separator)
        elif schema == 'google' and intraday_data:
            # intraday only
            df = get_google_data(item, lookback_period, data_fractal)
        elif pandas_data:
            # daily only
            df = get_pandas_data(schema, item, lookback_period)
        else:
            logger.error("Unsupported Data Source: %s", schema)
        # Now that we have content, standardize the data
        if df is not None and not df.empty:
            logger.info("%d data points from %s to %s", len(df), from_date, to_date)
            # convert data to canonical form
            df = convert_data(df, index_column, intraday_data)
            # resample data and forward fill any NA values
            if resample_data:
                df = df.resample(fractal).agg({'open'   : 'first',
                                               'high'   : 'max',
                                               'low'    : 'min',
                                               'close'  : 'last',
                                               'volume' : 'sum'})
                df.dropna(axis=0, how='any', inplace=True)
                logger.info("Rows after Resampling at %s: %d",
                            fractal, len(df))
            # add intraday columns if necessary
            if intraday_data:
                df = enhance_intraday_data(df)
            # allocate global Frame
            newf = Frame(item.lower(), gspace, df)
            if newf is None:
                logger.error("Could not allocate Frame for: %s", item)
            # calculate maximum number of periods
            df_len = len(df)
            if df_len > n_periods:
                n_periods = df_len
        else:
            logger.info("No DataFrame for %s", item)

    # The number of periods actually retrieved
    return n_periods
示例#11
0
文件: data.py 项目: xr3i444/AlphaPy
def get_market_data(model,
                    market_specs,
                    group,
                    lookback_period,
                    intraday_data=False):
    r"""Get data from an external feed.

    Parameters
    ----------
    model : alphapy.Model
        The model object describing the data.
    market_specs : dict
        The specifications for controlling the MarketFlow pipeline.
    group : alphapy.Group
        The group of symbols.
    lookback_period : int
        The number of periods of data to retrieve.
    intraday_data : bool
        If True, then get intraday data.

    Returns
    -------
    n_periods : int
        The maximum number of periods actually retrieved.

    """

    # Unpack market specifications

    data_fractal = market_specs['data_fractal']
    subschema = market_specs['subschema']

    # Unpack model specifications

    directory = model.specs['directory']
    extension = model.specs['extension']
    separator = model.specs['separator']

    # Unpack group elements

    gspace = group.space
    schema = gspace.schema
    fractal = gspace.fractal

    # Determine the feed source

    if intraday_data:
        # intraday data (date and time)
        logger.info("%s Intraday Data [%s] for %d periods", schema,
                    data_fractal, lookback_period)
        index_column = 'datetime'
    else:
        # daily data or higher (date only)
        logger.info("%s Daily Data [%s] for %d periods", schema, data_fractal,
                    lookback_period)
        index_column = 'date'

    # Get the data from the relevant feed

    data_dir = SSEP.join([directory, 'data'])
    n_periods = 0
    resample_data = True if fractal != data_fractal else False

    # Date Arithmetic

    to_date = pd.to_datetime('today')
    from_date = to_date - pd.to_timedelta(lookback_period, unit='d')
    to_date = to_date.strftime('%Y-%m-%d')
    from_date = from_date.strftime('%Y-%m-%d')

    # Get the data from the specified data feed

    df = pd.DataFrame()
    for symbol in group.members:
        logger.info("Getting %s data from %s to %s", symbol.upper(), from_date,
                    to_date)
        # Locate the data source
        if schema == 'data':
            # local intraday or daily
            dspace = Space(gspace.subject, gspace.schema, data_fractal)
            fname = frame_name(symbol.lower(), dspace)
            df = read_frame(data_dir, fname, extension, separator)
        elif schema in data_dispatch_table.keys():
            df = data_dispatch_table[schema](schema, subschema, symbol,
                                             intraday_data, data_fractal,
                                             from_date, to_date,
                                             lookback_period)
        else:
            logger.error("Unsupported Data Source: %s", schema)
        # Now that we have content, standardize the data
        if not df.empty:
            logger.info("Rows: %d [%s]", len(df), data_fractal)
            # convert data to canonical form
            df = convert_data(df, index_column, intraday_data)
            # resample data and forward fill any NA values
            if resample_data:
                df = df.resample(fractal).agg({
                    'open': 'first',
                    'high': 'max',
                    'low': 'min',
                    'close': 'last',
                    'volume': 'sum'
                })
                df.dropna(axis=0, how='any', inplace=True)
                logger.info("Rows after Resampling at %s: %d", fractal,
                            len(df))
            # add intraday columns if necessary
            if intraday_data:
                df = enhance_intraday_data(df)
            # allocate global Frame
            newf = Frame(symbol.lower(), gspace, df)
            if newf is None:
                logger.error("Could not allocate Frame for: %s",
                             symbol.upper())
            # calculate maximum number of periods
            df_len = len(df)
            if df_len > n_periods:
                n_periods = df_len
        else:
            logger.info("No DataFrame for %s", symbol.upper())

    # The number of periods actually retrieved
    return n_periods