Exemplo n.º 1
0
    def set_up_data(self, data_gross=None, data_costs=None, weight_matrix=None):
        if weight_matrix is not None:
            setattr(self, "data", weight_matrix.ffill())
            return None
        
        log=self.log
        frequency=self.frequency
        equalise_gross = self.equalise_gross
        cost_multiplier = self.cost_multiplier
        annualisation = self.annualisation
        period_target_SR = self.period_target_SR

        data_gross = [data_item.cumsum().resample(frequency, how="last").diff() for
                       data_item in data_gross]
        
        data_costs = [data_item.cumsum().resample(frequency, how="last").diff() for
                      data_item in data_costs]

        data_gross=df_from_list(data_gross)    
        data_costs=df_from_list(data_costs)    

        if equalise_gross:
            print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Setting all gross returns to be identical - optimisation driven only by costs")
        if cost_multiplier!=1.0:
            print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Using cost multiplier on optimisation of %.2f" % cost_multiplier)                
        data = work_out_net(data_gross, data_costs, annualisation=annualisation,
                            equalise_gross=equalise_gross, cost_multiplier=cost_multiplier,
                            period_target_SR=period_target_SR)                    
        setattr(self, "data", data)
Exemplo n.º 2
0
    def set_up_data(self, data_gross=None, data_costs=None,
                    weight_matrix=None):
        """

        Optimise weights over some returns data

        :param data_gross: Returns data for gross returns
        :type data_gross: pd.DataFrame or list if pooling

        :param data_net: Returns data for costs
        :type data_net: pd.DataFrame or list if pooling

        :param weight_matrix: some weight_matrix, used if equal weights and so don't need returns data
        :type weight_matrix: pd.DataFrame or list if pooling

        """
        if weight_matrix is not None:
            setattr(self, "data", weight_matrix.ffill())
            return None

        log = self.log
        frequency = self.frequency
        equalise_gross = self.equalise_gross
        cost_multiplier = self.cost_multiplier
        annualisation = self.annualisation
        period_target_SR = self.period_target_SR

        # resample, indexing before and differencing after (returns, remember)
        data_gross = [data_item.cumsum().resample(frequency, how="last").diff() for
                      data_item in data_gross]

        data_costs = [data_item.cumsum().resample(frequency, how="last").diff() for
                      data_item in data_costs]

        # stack de-pool pooled data
        data_gross = df_from_list(data_gross)
        data_costs = df_from_list(data_costs)

        # net gross and costs
        if equalise_gross:
            log.terse(
                "Setting all gross returns to be identical - optimisation driven only by costs")
        if cost_multiplier != 1.0:
            log.terse(
                "Using cost multiplier on optimisation of %.2f" %
                cost_multiplier)

        data = work_out_net(data_gross, data_costs, annualisation=annualisation,
                            equalise_gross=equalise_gross, cost_multiplier=cost_multiplier,
                            period_target_SR=period_target_SR)

        setattr(self, "data", data)
Exemplo n.º 3
0
    def __init__(self, data, frequency="W",
                 date_method="expanding", rollyears=20, 
                 dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs):
        cleaning=str2Bool(cleaning)
        group_dict=group_dict_from_natural(dict_group)
        data=df_from_list(data)    
        column_names=list(data.columns)
        data=data.resample(frequency, how="last")
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)
        size=len(column_names)
        corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag)        
        corr_list=[]        
        print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Correlation estimate")
        for fit_period in fit_dates:
            print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Estimating from %s to %s" % (fit_period.period_start, fit_period.period_end))            
            if fit_period.no_data:
                corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan)
                corrmat=corr_with_nan                
            else:                
                data_for_estimate=data[fit_period.fit_start:fit_period.fit_end]  
                corrmat=correlation_single_period(data_for_estimate, **kwargs)

            if cleaning:
                current_period_data=data[fit_period.fit_start:fit_period.fit_end] 
                must_haves=must_have_item(current_period_data)
                corrmat=clean_correlation(corrmat, corr_with_no_data, must_haves) 

            corr_list.append(corrmat)
        
        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Exemplo n.º 4
0
    def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", 
                 rollyears=20, 
                 dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs):
        cleaning=str2Bool(cleaning)
    
        ## grouping dictionary, convert to faster, algo friendly, form
        group_dict=group_dict_from_natural(dict_group)

        data=df_from_list(data)    
        column_names=list(data.columns)

        data=data.resample(frequency, how="last")
            
        ### Generate time periods
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)

        size=len(column_names)
        corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag)
        
        ## create a list of correlation matrices
        corr_list=[]
        
        print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Correlation estimate")
        
        ## Now for each time period, estimate correlation
        for fit_period in fit_dates:
            print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Estimating from %s to %s" % (fit_period.period_start, fit_period.period_end))
            
            if fit_period.no_data:
                ## no data to fit with
                corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan)
                corrmat=corr_with_nan
                
            else:
                
                data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] 
                
                corrmat=correlation_single_period(data_for_estimate, 
                                                     **kwargs)

            if cleaning:
                current_period_data=data[fit_period.fit_start:fit_period.fit_end] 
                must_haves=must_have_item(current_period_data)

                # means we can use earlier correlations with sensible values
                corrmat=clean_correlation(corrmat, corr_with_no_data, must_haves) 

            corr_list.append(corrmat)
        
        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Exemplo n.º 5
0
    def __init__(self,
                 data,
                 log=logtoscreen("optimiser"),
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 dict_group=dict(),
                 boring_offdiag=0.99,
                 cleaning=True,
                 **kwargs):
        """

        We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: Data to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param dict_group: dictionary of groupings; used to replace missing values
        :type dict_group: dict

        :param boring_offdiag: Value used in creating 'boring' matrix, for when no data
        :type boring_offdiag: float

        :param **kwargs: passed to correlation_single_period

        :returns: CorrelationList
        """

        cleaning = str2Bool(cleaning)

        # grouping dictionary, convert to faster, algo friendly, form
        group_dict = group_dict_from_natural(dict_group)

        data = df_from_list(data)
        column_names = list(data.columns)

        data = data.resample(frequency).last()

        # Generate time periods
        fit_dates = generate_fitting_dates(data,
                                           date_method=date_method,
                                           rollyears=rollyears)

        size = len(column_names)
        corr_with_no_data = boring_corr_matrix(size, offdiag=boring_offdiag)

        # create a list of correlation matrices
        corr_list = []

        progress = progressBar(len(fit_dates), "Estimating correlations")
        # Now for each time period, estimate correlation
        for fit_period in fit_dates:
            progress.iterate()
            if fit_period.no_data:
                # no data to fit with
                corr_with_nan = boring_corr_matrix(size,
                                                   offdiag=np.nan,
                                                   diag=np.nan)
                corrmat = corr_with_nan

            else:

                data_for_estimate = data[fit_period.fit_start:fit_period.
                                         fit_end]

                corrmat = correlation_single_period(data_for_estimate,
                                                    **kwargs)

            if cleaning:
                current_period_data = data[fit_period.fit_start:fit_period.
                                           fit_end]
                must_haves = must_have_item(current_period_data)

                # means we can use earlier correlations with sensible values
                corrmat = clean_correlation(corrmat, corr_with_no_data,
                                            must_haves)

            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Exemplo n.º 6
0
    def __init__(self,
                 data,
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 **kwargs):
        """

        We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: simData to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param **kwargs: passed to correlationSinglePeriod

        :returns: CorrelationList
        """

        if isinstance(data, list):

            # turn the list of data into a single dataframe. This will have a unique time series, which we manage
            #   through adding a small offset of a few microseconds

            length_of_data = len(data)
            data_resampled = [
                data_item.resample(frequency).last() for data_item in data
            ]
            data_as_df = df_from_list(data_resampled)

        else:
            length_of_data = 1
            data_as_df = data.resample(frequency).last()

        column_names = list(data_as_df.columns)

        # Generate time periods
        fit_dates = generate_fitting_dates(data_as_df,
                                           date_method=date_method,
                                           rollyears=rollyears)

        # create a single period correlation estimator
        correlation_estimator_for_one_period = correlationSinglePeriod(
            data_as_df, length_of_data=length_of_data, **kwargs)

        # create a list of correlation matrices
        corr_list = []

        progress = progressBar(len(fit_dates), "Estimating correlations")
        # Now for each time period, estimate correlation
        for fit_period in fit_dates:

            progress.iterate()
            corrmat = correlation_estimator_for_one_period.calculate(
                fit_period)
            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Exemplo n.º 7
0
    def set_up_data(self, data, frequency = "W", equalise_gross = False,
                    cost_multiplier = 1.0, annualisation = BUSINESS_DAYS_IN_YEAR,
                    ann_target_SR = TARGET_ANN_SR,
                    use_pooled_costs = False, pool_gross_returns = False,
                    identifier=None):
        """

        Optimise weights over some returns data

        :param data_gross: Returns data for gross returns
        :type data_gross: pd.DataFrame or list if pooling

        :param data_net: Returns data for costs
        :type data_net: pd.DataFrame or list if pooling

        """

        log = self.log

        # have to decode these
        # returns two lists of pd.DataFrames
        # The weighting function requires two lists of pd.DataFrames,
        # one gross, one for costs

        if identifier is None and len(data.keys()) > 1:
            log.warning(
                "No identifier passed to optimisation code with pooled data passed - using arbitary code - results may be weird")
            identifier = data.keys()[0]

        (data_gross, data_costs) = decompose_group_pandl(
            data, identifier, pool_costs=use_pooled_costs,
            pool_gross = pool_gross_returns)

        # resample, indexing before and differencing after (returns, remember)
        data_gross = [
            data_item.cumsum().resample(frequency).last().diff()
            for data_item in data_gross
        ]

        data_costs = [
            data_item.cumsum().resample(frequency).last().diff()
            for data_item in data_costs
        ]

        # for diagnostic purposes
        # FIXME: HACK TO GET THIS REFACTOR WORKING
        self.unmultiplied_costs = df_from_list(data_costs)

        # net gross and costs
        # first some warnings
        if equalise_gross:
            log.terse(
                "Setting all gross returns to be identical - optimisation driven only by costs"
            )

        if cost_multiplier != 1.0:
            log.terse("Using cost multiplier on optimisation of %.2f" %
                      cost_multiplier)

        # Will be needed if we equalise_gross returns
        period_target_SR = ann_target_SR / (annualisation ** .5)

        # now work out the net
        net_return_data = work_out_net(
            data_gross,
            data_costs,
            equalise_gross=equalise_gross,
            cost_multiplier=cost_multiplier,
            period_target_SR=period_target_SR)

        # FIXME: I STILL HAVE CONCERNS THAT THIS PREMATURE, SO DIVE INTO OPTIMISATION CODE AT NEXT REFACTOR
        net_return_data = df_from_list(net_return_data)

        setattr(self, "data", net_return_data)
        setattr(self, "period_target_SR", period_target_SR)
Exemplo n.º 8
0
    def __init__(self, data_gross, data_costs, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", 
                         rollyears=20, fit_method="bootstrap", cleaning=True, equalise_gross=False,
                         cost_multiplier=1.0, apply_cost_weight=True, ceiling_cost_SR=0.13,
                         ann_target_SR=TARGET_ANN_SR,
                         **passed_params):
        
        """
    
        Optimise weights over some returns data
        
        :param data_gross: Returns data for gross returns
        :type data_gross: pd.DataFrame or list if pooling

        :param data_net: Returns data for costs
        :type data_net: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str
    
        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period'
        :type fit_method: str
    
        :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings?
        :type equalise_gross: bool

        :param cost_multiplier: Multiply costs by this number
        :type cost_multiplier: float

        :param apply_cost_weight: Should we adjust our weightings to reflect costs?
        :type apply_cost_weight: bool

        :param ceiling_cost_SR: What is the maximum SR cost beyond which I don't allocate to an asset. Set to 999 to avoid using.
        :type ceiling_cost_SR: float
    
        :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions
        
        :returns: pd.DataFrame of weights
        """

        ## Because interaction of parameters is complex, display warnings         
        display_warnings(log, cost_multiplier, equalise_gross, apply_cost_weight, **passed_params)
        
        cleaning=str2Bool(cleaning)
        optimise_params=copy(passed_params)

        ## annualisation
        ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0)
        annualisation=ann_dict.get(frequency, 1.0)

        period_target_SR=ann_target_SR/(annualisation**.5)
        ceiling_cost_SR_period=ceiling_cost_SR/(annualisation**.5)
        
        ## A moments estimator works out the mean, vol, correlation
        ## Also stores annualisation factor and target SR (used for shrinkage and equalising)
        moments_estimator=momentsEstimator(optimise_params, annualisation,  ann_target_SR)

        ## The optimiser instance will do the optimation once we have the appropriate data
        optimiser=optimiserWithParams(optimise_params, moments_estimator)
    
    
        ## resample, indexing before and differencing after (returns, remember)
        data_gross = [data_item.cumsum().resample(frequency, how="last").diff() for
                       data_item in data_gross]
        
        data_costs = [data_item.cumsum().resample(frequency, how="last").diff() for
                      data_item in data_costs]

        ## stack de-pool pooled data    
        data_gross=df_from_list(data_gross)    
        data_costs=df_from_list(data_costs)    
        
        ## net gross and costs
        if equalise_gross:
            log.terse("Setting all gross returns to be identical - optimisation driven only by costs")
        if cost_multiplier!=1.0:
            log.terse("Using cost multiplier on optimisation of %.2f" % cost_multiplier)
        
        
        data = work_out_net(data_gross, data_costs, annualisation=annualisation,
                            equalise_gross=equalise_gross, cost_multiplier=cost_multiplier,
                            ceiling_cost_ann_SR=ceiling_cost_SR, 
                            period_target_SR=period_target_SR)
            
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)
    
        ## Now for each time period, estimate weights
        ## create a list of weight vectors
        weight_list=[]
        
        ## create a class object for each period
        opt_results=[]
        
        log.terse("Optimising...")
        
        for fit_period in fit_dates:
            log.msg("Optimising for data from %s to %s" % (str(fit_period.period_start), str(fit_period.period_end)))
            ## Do the optimisation for one period, using a particular optimiser instance
            results_this_period=optSinglePeriod(self, data, fit_period, optimiser, cleaning)

            opt_results.append(results_this_period)

            weights=results_this_period.weights
            
            ## We adjust dates slightly to ensure no overlaps
            dindex=[fit_period.period_start+datetime.timedelta(days=1), 
                    fit_period.period_end-datetime.timedelta(days=1)]
            
            ## create a double row to delineate start and end of test period
            weight_row=pd.DataFrame([weights]*2, index=dindex, columns=data.columns)
            weight_list.append(weight_row)

        ## Stack everything up    
        raw_weight_df=pd.concat(weight_list, axis=0)

        if apply_cost_weight:
            log.terse("Applying cost weighting to optimisation results")
            weight_df = apply_cost_weighting(raw_weight_df, data_gross, data_costs, annualisation)
        else:
            weight_df =raw_weight_df 
        
        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
        setattr(self, "raw_weights", raw_weight_df)
Exemplo n.º 9
0
    def set_up_data(self, data, frequency = "W", equalise_gross = False,
                    cost_multiplier = 1.0, annualisation = BUSINESS_DAYS_IN_YEAR,
                    ann_target_SR = TARGET_ANN_SR,
                    use_pooled_costs = False, pool_gross_returns = False,
                    identifier=None):
        """

        Optimise weights over some returns data

        :param data_gross: Returns data for gross returns
        :type data_gross: pd.DataFrame or list if pooling

        :param data_net: Returns data for costs
        :type data_net: pd.DataFrame or list if pooling

        """

        log = self.log

        # have to decode these
        # returns two lists of pd.DataFrames
        # The weighting function requires two lists of pd.DataFrames,
        # one gross, one for costs

        if identifier is None and len(data.keys()) > 1:
            log.warning(
                "No identifier passed to optimisation code with pooled data passed - using arbitary code - results may be weird")
            identifier = data.keys()[0]

        (data_gross, data_costs) = decompose_group_pandl(
            data, identifier, pool_costs=use_pooled_costs,
            pool_gross = pool_gross_returns)

        # resample, indexing before and differencing after (returns, remember)
        data_gross = [
            data_item.cumsum().resample(frequency).last().diff()
            for data_item in data_gross
        ]

        data_costs = [
            data_item.cumsum().resample(frequency).last().diff()
            for data_item in data_costs
        ]

        # for diagnostic purposes
        # FIXME: HACK TO GET THIS REFACTOR WORKING
        self.unmultiplied_costs = df_from_list(data_costs)

        # net gross and costs
        # first some warnings
        if equalise_gross:
            log.terse(
                "Setting all gross returns to be identical - optimisation driven only by costs"
            )

        if cost_multiplier != 1.0:
            log.terse("Using cost multiplier on optimisation of %.2f" %
                      cost_multiplier)

        # Will be needed if we equalise_gross returns
        period_target_SR = ann_target_SR / (annualisation ** .5)

        # now work out the net
        net_return_data = work_out_net(
            data_gross,
            data_costs,
            equalise_gross=equalise_gross,
            cost_multiplier=cost_multiplier,
            period_target_SR=period_target_SR)

        # FIXME: I STILL HAVE CONCERNS THAT THIS PREMATURE, SO DIVE INTO OPTIMISATION CODE AT NEXT REFACTOR
        net_return_data = df_from_list(net_return_data)

        setattr(self, "data", net_return_data)
        setattr(self, "period_target_SR", period_target_SR)
Exemplo n.º 10
0
    def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, 
                 dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs):
        """
    
        We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling
        
        Its important that forward filling, or index / ffill / diff has been done before we begin
                
        :param data: Data to get correlations from
        :type data: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param dict_group: dictionary of groupings; used to replace missing values
        :type dict_group: dict
    
        :param boring_offdiag: Value used in creating 'boring' matrix, for when no data
        :type boring_offdiag: float 
    
        :param **kwargs: passed to correlation_single_period
        
        :returns: CorrelationList
        """

        cleaning=str2Bool(cleaning)
    
        ## grouping dictionary, convert to faster, algo friendly, form
        group_dict=group_dict_from_natural(dict_group)

        data=df_from_list(data)    
        column_names=list(data.columns)

        data=data.resample(frequency, how="last")
            
        ### Generate time periods
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)

        size=len(column_names)
        corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag)
        
        ## create a list of correlation matrices
        corr_list=[]
        
        log.terse("Correlation estimate")
        
        ## Now for each time period, estimate correlation
        for fit_period in fit_dates:
            log.msg("Fitting from %s to %s" % (fit_period.period_start, fit_period.period_end))
            
            if fit_period.no_data:
                ## no data to fit with
                corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan)
                corrmat=corr_with_nan
                
            else:
                
                data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] 
                
                corrmat=correlation_single_period(data_for_estimate, 
                                                     **kwargs)

            if cleaning:
                # means we can use earlier correlations with sensible values
                corrmat=clean_correlation(corrmat, corr_with_no_data, boring_offdiag) 

            corr_list.append(corrmat)
        
        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Exemplo n.º 11
0
    def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", 
                         rollyears=20, fit_method="bootstrap", cleaning=True,
                         **passed_params):
        
        """
    
        Optimise weights over some returns data
        
        :param data: Returns data
        :type data: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str
    
        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period'
        :type fit_method: str
    
        :param cleaning: Should we clean correlations so can use incomplete data?
        :type cleaning: bool
    
        :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions
        
        :returns: pd.DataFrame of weights
        """
        cleaning=str2Bool(cleaning)
        optimise_params=copy(passed_params)
        
        ## A moments estimator works out the mean, vol, correlation
        moments_estimator=momentsEstimator(optimise_params)

        ## The optimiser instance will do the optimation once we have the appropriate data
        optimiser=optimiserWithParams(optimise_params, moments_estimator)
    
        ## annualisation
        ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0)
        annualisation=ann_dict.get(frequency, 1.0)
    
        ## de-pool pooled data    
        data=df_from_list(data)    
            
        ## resample, indexing before and differencing after (returns, remember)
        data=data.cumsum().resample(frequency, how="last").diff()
    
        ## account for change in frequency
        data=data*annualisation
        
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)
    
        ## Now for each time period, estimate weights
        ## create a list of weight vectors
        weight_list=[]
        
        ## create a class object for each period
        opt_results=[]
        
        log.terse("Optimising...")
        
        for fit_period in fit_dates:
            log.msg("Optimising for data from %s to %s" % (str(fit_period.period_start), str(fit_period.period_end)))
            ## Do the optimisation for one period, using a particular optimiser instance
            results_this_period=optSinglePeriod(self, data, fit_period, optimiser, cleaning)

            opt_results.append(results_this_period)

            weights=results_this_period.weights
            
            ## We adjust dates slightly to ensure no overlaps
            dindex=[fit_period.period_start+datetime.timedelta(days=1), 
                    fit_period.period_end-datetime.timedelta(days=1)]
            
            ## create a double row to delineate start and end of test period
            weight_row=pd.DataFrame([weights]*2, index=dindex, columns=data.columns)
            weight_list.append(weight_row)

        ## Stack everything up    
        weight_df=pd.concat(weight_list, axis=0)
        
        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
Exemplo n.º 12
0
    def __init__(self,
                 data_gross,
                 data_costs,
                 log=logtoscreen("optimiser"),
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 fit_method="bootstrap",
                 cleaning=True,
                 equalise_gross=False,
                 cost_multiplier=1.0,
                 apply_cost_weight=True,
                 ceiling_cost_SR=0.13,
                 ann_target_SR=TARGET_ANN_SR,
                 **passed_params):
        """
    
        Optimise weights over some returns data
        
        :param data_gross: Returns data for gross returns
        :type data_gross: pd.DataFrame or list if pooling

        :param data_net: Returns data for costs
        :type data_net: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str
    
        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period'
        :type fit_method: str
    
        :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings?
        :type equalise_gross: bool

        :param cost_multiplier: Multiply costs by this number
        :type cost_multiplier: float

        :param apply_cost_weight: Should we adjust our weightings to reflect costs?
        :type apply_cost_weight: bool

        :param ceiling_cost_SR: What is the maximum SR cost beyond which I don't allocate to an asset. Set to 999 to avoid using.
        :type ceiling_cost_SR: float
    
        :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions
        
        :returns: pd.DataFrame of weights
        """

        ## Because interaction of parameters is complex, display warnings
        display_warnings(log, cost_multiplier, equalise_gross,
                         apply_cost_weight, **passed_params)

        cleaning = str2Bool(cleaning)
        optimise_params = copy(passed_params)

        ## annualisation
        ann_dict = dict(D=BUSINESS_DAYS_IN_YEAR,
                        W=WEEKS_IN_YEAR,
                        M=MONTHS_IN_YEAR,
                        Y=1.0)
        annualisation = ann_dict.get(frequency, 1.0)

        period_target_SR = ann_target_SR / (annualisation**.5)
        ceiling_cost_SR_period = ceiling_cost_SR / (annualisation**.5)

        ## A moments estimator works out the mean, vol, correlation
        ## Also stores annualisation factor and target SR (used for shrinkage and equalising)
        moments_estimator = momentsEstimator(optimise_params, annualisation,
                                             ann_target_SR)

        ## The optimiser instance will do the optimation once we have the appropriate data
        optimiser = optimiserWithParams(optimise_params, moments_estimator)

        ## resample, indexing before and differencing after (returns, remember)
        data_gross = [
            data_item.cumsum().resample(frequency, how="last").diff()
            for data_item in data_gross
        ]

        data_costs = [
            data_item.cumsum().resample(frequency, how="last").diff()
            for data_item in data_costs
        ]

        ## stack de-pool pooled data
        data_gross = df_from_list(data_gross)
        data_costs = df_from_list(data_costs)

        ## net gross and costs
        if equalise_gross:
            log.terse(
                "Setting all gross returns to be identical - optimisation driven only by costs"
            )
        if cost_multiplier != 1.0:
            log.terse("Using cost multiplier on optimisation of %.2f" %
                      cost_multiplier)

        data = work_out_net(data_gross,
                            data_costs,
                            annualisation=annualisation,
                            equalise_gross=equalise_gross,
                            cost_multiplier=cost_multiplier,
                            ceiling_cost_ann_SR=ceiling_cost_SR,
                            period_target_SR=period_target_SR)

        fit_dates = generate_fitting_dates(data,
                                           date_method=date_method,
                                           rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)

        ## Now for each time period, estimate weights
        ## create a list of weight vectors
        weight_list = []

        ## create a class object for each period
        opt_results = []

        log.terse("Optimising...")

        for fit_period in fit_dates:
            log.msg("Optimising for data from %s to %s" %
                    (str(fit_period.period_start), str(fit_period.period_end)))
            ## Do the optimisation for one period, using a particular optimiser instance
            results_this_period = optSinglePeriod(self, data, fit_period,
                                                  optimiser, cleaning)

            opt_results.append(results_this_period)

            weights = results_this_period.weights

            ## We adjust dates slightly to ensure no overlaps
            dindex = [
                fit_period.period_start + datetime.timedelta(days=1),
                fit_period.period_end - datetime.timedelta(days=1)
            ]

            ## create a double row to delineate start and end of test period
            weight_row = pd.DataFrame([weights] * 2,
                                      index=dindex,
                                      columns=data.columns)
            weight_list.append(weight_row)

        ## Stack everything up
        raw_weight_df = pd.concat(weight_list, axis=0)

        if apply_cost_weight:
            log.terse("Applying cost weighting to optimisation results")
            weight_df = apply_cost_weighting(raw_weight_df, data_gross,
                                             data_costs, annualisation)
        else:
            weight_df = raw_weight_df

        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
        setattr(self, "raw_weights", raw_weight_df)
Exemplo n.º 13
0
    def __init__(self,
                 data,
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 **kwargs):
        """

        We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: simData to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param **kwargs: passed to correlationSinglePeriod

        :returns: CorrelationList
        """

        if type(data) is list:

            # turn the list of data into a single dataframe. This will have a unique time series, which we manage
            #   through adding a small offset of a few microseconds

            length_of_data = len(data)
            data_resampled = [
                data_item.resample(frequency).last() for data_item in data
            ]
            data_as_df = df_from_list(data_resampled)

        else:
            length_of_data = 1
            data_as_df = data.resample(frequency).last()

        column_names = list(data_as_df.columns)

        # Generate time periods
        fit_dates = generate_fitting_dates(
            data_as_df, date_method=date_method, rollyears=rollyears)

        # create a single period correlation estimator
        correlation_estimator_for_one_period = correlationSinglePeriod(
            data_as_df, length_of_data=length_of_data, **kwargs)

        # create a list of correlation matrices
        corr_list = []

        progress = progressBar(len(fit_dates), "Estimating correlations")
        # Now for each time period, estimate correlation
        for fit_period in fit_dates:

            progress.iterate()
            corrmat = correlation_estimator_for_one_period.calculate(
                fit_period)
            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)