Python generate_fitting_dates示例，syscore.dateutils.generate_fitting_dates Python示例

示例#1

0

显示文件

def fit_a_filter_datewise(system, rule_name, instrument_code=None, return_period=5, date_method="expanding", rollyears=999, buckets=3):
    """
    
    if instrument_code is None, fits across all instruments
    """

    if instrument_code is None:
        instrument_list=system.get_instrument_list()
    else:
        instrument_list=[instrument_code]
    
    fit_dates=generate_fitting_dates([system.rules.get_raw_forecast(instrument_code, rule_name) for instrument_code in instrument_list], date_method, rollyears)                        

    filter_data=[]
    for fit_period in fit_dates:
        system.log.msg("Estimating fitting from %s to %s" % (fit_period.period_start, fit_period.period_end))
        
        if fit_period.no_data:
            data=[None, None]
        else:
        
            data=fit_a_filter(system, rule_name, instrument_list, fit_period.fit_start, fit_period.fit_end, return_period, buckets)

        filter_data.append(data)

    return (fit_dates, filter_data)

示例#2

0

显示文件

文件： overextend.py 项目： Futurequant/pysystemtrade

def fit_a_filter_datewise(system, rule_name, instrument_code=None, return_period=5, date_method="expanding", rollyears=999, buckets=3):
    """
    
    if instrument_code is None, fits across all instruments
    """

    if instrument_code is None:
        instrument_list=system.get_instrument_list()
    else:
        instrument_list=[instrument_code]
    
    fit_dates=generate_fitting_dates([system.rules.get_raw_forecast(instrument_code, rule_name) for instrument_code in instrument_list], date_method, rollyears)                        

    filter_data=[]
    for fit_period in fit_dates:
        system.log.msg("Estimating fitting from %s to %s" % (fit_period.period_start, fit_period.period_end))
        
        if fit_period.no_data:
            data=[None, None]
        else:
        
            data=fit_a_filter(system, rule_name, instrument_list, fit_period.fit_start, fit_period.fit_end, return_period, buckets)

        filter_data.append(data)

    return (fit_dates, filter_data)

示例#3

0

显示文件

文件： optimisation.py 项目： dimiter-tsvetkov/kod

    def optimise(self, ann_SR_costs=None):

        """
    
        Optimise weights over some returns data
        
        """
        log=self.log
        date_method = self.date_method
        rollyears = self.rollyears
        optimiser = self.optimiser
        cleaning = self.cleaning
        apply_cost_weight = self.apply_cost_weight
        
        data=getattr(self, "data", None)
        if data is None:
            log.critical("You need to run .set_up_data() before .optimise()")
        
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)
    
        ## Now for each time period, estimate weights
        ## create a list of weight vectors
        weight_list=[]
        
        ## create a class object for each period
        opt_results=[]
        
        print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Optimising...")
        
        for fit_period in fit_dates:            
            print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Optimising for data from %s to %s" % (str(fit_period.period_start), str(fit_period.period_end)))
            ## Do the optimisation for one period, using a particular optimiser instance
            results_this_period=optSinglePeriod(self, data, fit_period, optimiser, cleaning)

            opt_results.append(results_this_period)

            weights=results_this_period.weights
            
            ## We adjust dates slightly to ensure no overlaps
            dindex=[fit_period.period_start+datetime.timedelta(days=1), 
                    fit_period.period_end-datetime.timedelta(days=1)]
            
            ## create a double row to delineate start and end of test period
            weight_row=pd.DataFrame([weights]*2, index=dindex, columns=data.columns)
            weight_list.append(weight_row)

        ## Stack everything up    
        raw_weight_df=pd.concat(weight_list, axis=0)
        
        if apply_cost_weight:
            print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Applying cost weighting to optimisation results")
            weight_df = apply_cost_weighting(raw_weight_df, ann_SR_costs)
        else:
            weight_df =raw_weight_df 
        
        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
        setattr(self, "raw_weights", raw_weight_df)

示例#4

0

显示文件

    def optimise(self, ann_SR_costs=None):
        """
    
        Optimise weights over some returns data
        
        """
        log = self.log
        date_method = self.date_method
        rollyears = self.rollyears
        optimiser = self.optimiser
        cleaning = self.cleaning
        apply_cost_weight = self.apply_cost_weight

        data = getattr(self, "data", None)
        if data is None:
            log.critical("You need to run .set_up_data() before .optimise()")

        fit_dates = generate_fitting_dates(data,
                                           date_method=date_method,
                                           rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)

        ## Now for each time period, estimate weights
        ## create a list of weight vectors
        weight_list = []

        ## create a class object for each period
        opt_results = []

        print(__file__ + ":" +
              str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +
              "Optimising...")

        for fit_period in fit_dates:
            print(__file__ + ":" +
                  str(inspect.getframeinfo(inspect.currentframe())[:3][1]) +
                  ":" + "Optimising for data from %s to %s" %
                  (str(fit_period.period_start), str(fit_period.period_end)))
            ## Do the optimisation for one period, using a particular optimiser instance
            results_this_period = optSinglePeriod(self, data, fit_period,
                                                  optimiser, cleaning)

            opt_results.append(results_this_period)

            weights = results_this_period.weights

            ## We adjust dates slightly to ensure no overlaps
            dindex = [
                fit_period.period_start + datetime.timedelta(days=1),
                fit_period.period_end - datetime.timedelta(days=1)
            ]

            ## create a double row to delineate start and end of test period
            weight_row = pd.DataFrame([weights] * 2,
                                      index=dindex,
                                      columns=data.columns)
            weight_list.append(weight_row)

        ## Stack everything up
        raw_weight_df = pd.concat(weight_list, axis=0)

        if apply_cost_weight:
            print(__file__ + ":" +
                  str(inspect.getframeinfo(inspect.currentframe())[:3][1]) +
                  ":" + "Applying cost weighting to optimisation results")
            weight_df = apply_cost_weighting(raw_weight_df, ann_SR_costs)
        else:
            weight_df = raw_weight_df

        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
        setattr(self, "raw_weights", raw_weight_df)

示例#5

0

显示文件

    def __init__(self,
                 data,
                 log=logtoscreen("optimiser"),
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 dict_group=dict(),
                 boring_offdiag=0.99,
                 cleaning=True,
                 **kwargs):
        """

        We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: Data to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param dict_group: dictionary of groupings; used to replace missing values
        :type dict_group: dict

        :param boring_offdiag: Value used in creating 'boring' matrix, for when no data
        :type boring_offdiag: float

        :param **kwargs: passed to correlation_single_period

        :returns: CorrelationList
        """

        cleaning = str2Bool(cleaning)

        # grouping dictionary, convert to faster, algo friendly, form
        group_dict = group_dict_from_natural(dict_group)

        data = df_from_list(data)
        column_names = list(data.columns)

        data = data.resample(frequency).last()

        # Generate time periods
        fit_dates = generate_fitting_dates(data,
                                           date_method=date_method,
                                           rollyears=rollyears)

        size = len(column_names)
        corr_with_no_data = boring_corr_matrix(size, offdiag=boring_offdiag)

        # create a list of correlation matrices
        corr_list = []

        progress = progressBar(len(fit_dates), "Estimating correlations")
        # Now for each time period, estimate correlation
        for fit_period in fit_dates:
            progress.iterate()
            if fit_period.no_data:
                # no data to fit with
                corr_with_nan = boring_corr_matrix(size,
                                                   offdiag=np.nan,
                                                   diag=np.nan)
                corrmat = corr_with_nan

            else:

                data_for_estimate = data[fit_period.fit_start:fit_period.
                                         fit_end]

                corrmat = correlation_single_period(data_for_estimate,
                                                    **kwargs)

            if cleaning:
                current_period_data = data[fit_period.fit_start:fit_period.
                                           fit_end]
                must_haves = must_have_item(current_period_data)

                # means we can use earlier correlations with sensible values
                corrmat = clean_correlation(corrmat, corr_with_no_data,
                                            must_haves)

            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)

示例#6

0

显示文件

文件： correlations.py 项目： zineos/pysystemtrade

    def __init__(self,
                 data,
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 **kwargs):
        """

        We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: simData to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param **kwargs: passed to correlationSinglePeriod

        :returns: CorrelationList
        """

        if isinstance(data, list):

            # turn the list of data into a single dataframe. This will have a unique time series, which we manage
            #   through adding a small offset of a few microseconds

            length_of_data = len(data)
            data_resampled = [
                data_item.resample(frequency).last() for data_item in data
            ]
            data_as_df = df_from_list(data_resampled)

        else:
            length_of_data = 1
            data_as_df = data.resample(frequency).last()

        column_names = list(data_as_df.columns)

        # Generate time periods
        fit_dates = generate_fitting_dates(data_as_df,
                                           date_method=date_method,
                                           rollyears=rollyears)

        # create a single period correlation estimator
        correlation_estimator_for_one_period = correlationSinglePeriod(
            data_as_df, length_of_data=length_of_data, **kwargs)

        # create a list of correlation matrices
        corr_list = []

        progress = progressBar(len(fit_dates), "Estimating correlations")
        # Now for each time period, estimate correlation
        for fit_period in fit_dates:

            progress.iterate()
            corrmat = correlation_estimator_for_one_period.calculate(
                fit_period)
            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)

示例#7

0

显示文件

文件： optimisation.py 项目： kohehir/pysystemtrade

    def optimise(self):
        """

        Optimise weights over some returns data



        """

        log = self.log
        date_method = self.date_method
        rollyears = self.rollyears
        optimiser = self.optimiser
        cleaning = self.cleaning
        apply_cost_weight = self.apply_cost_weight

        data = getattr(self, "data", None)


        if data is None:
            log.critical("You need to run .set_up_data() before .optimise()")

        fit_dates = generate_fitting_dates(
            data, date_method=date_method, rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)

        # Now for each time period, estimate weights
        # create a list of weight vectors
        weight_list = []

        # create a class object for each period
        opt_results = []

        progress = progressBar(len(fit_dates), "Optimising")

        for fit_period in fit_dates:
            # Do the optimisation for one period, using a particular optimiser
            # instance
            results_this_period = optSinglePeriod(self, data, fit_period,
                                                  optimiser, cleaning)

            opt_results.append(results_this_period)

            weights = results_this_period.weights

            # We adjust dates slightly to ensure no overlaps
            dindex = [
                fit_period.period_start + datetime.timedelta(days=1),
                fit_period.period_end - datetime.timedelta(days=1)
            ]

            # create a double row to delineate start and end of test period
            weight_row = pd.DataFrame(
                [weights] * 2, index=dindex, columns=data.columns)
            weight_list.append(weight_row)
            progress.iterate()

        # Stack everything up
        raw_weight_df = pd.concat(weight_list, axis=0)

        if apply_cost_weight:
            log.terse("Applying cost weighting to optimisation results")
            # ann_SR_costs must be calculated before a cost multiplier is applied
            ann_SR_costs = self.calculate_ann_SR_costs()

            weight_df = apply_cost_weighting(raw_weight_df, ann_SR_costs)
        else:
            weight_df = raw_weight_df

        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
        setattr(self, "raw_weights", raw_weight_df)

示例#8

0

显示文件

文件： optimisation.py 项目： SkippyHo/pysystemtrade

    def __init__(self, data_gross, data_costs, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", 
                         rollyears=20, fit_method="bootstrap", cleaning=True, equalise_gross=False,
                         cost_multiplier=1.0, apply_cost_weight=True, ceiling_cost_SR=0.13,
                         ann_target_SR=TARGET_ANN_SR,
                         **passed_params):
        
        """
    
        Optimise weights over some returns data
        
        :param data_gross: Returns data for gross returns
        :type data_gross: pd.DataFrame or list if pooling

        :param data_net: Returns data for costs
        :type data_net: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str
    
        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period'
        :type fit_method: str
    
        :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings?
        :type equalise_gross: bool

        :param cost_multiplier: Multiply costs by this number
        :type cost_multiplier: float

        :param apply_cost_weight: Should we adjust our weightings to reflect costs?
        :type apply_cost_weight: bool

        :param ceiling_cost_SR: What is the maximum SR cost beyond which I don't allocate to an asset. Set to 999 to avoid using.
        :type ceiling_cost_SR: float
    
        :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions
        
        :returns: pd.DataFrame of weights
        """

        ## Because interaction of parameters is complex, display warnings         
        display_warnings(log, cost_multiplier, equalise_gross, apply_cost_weight, **passed_params)
        
        cleaning=str2Bool(cleaning)
        optimise_params=copy(passed_params)

        ## annualisation
        ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0)
        annualisation=ann_dict.get(frequency, 1.0)

        period_target_SR=ann_target_SR/(annualisation**.5)
        ceiling_cost_SR_period=ceiling_cost_SR/(annualisation**.5)
        
        ## A moments estimator works out the mean, vol, correlation
        ## Also stores annualisation factor and target SR (used for shrinkage and equalising)
        moments_estimator=momentsEstimator(optimise_params, annualisation,  ann_target_SR)

        ## The optimiser instance will do the optimation once we have the appropriate data
        optimiser=optimiserWithParams(optimise_params, moments_estimator)
    
    
        ## resample, indexing before and differencing after (returns, remember)
        data_gross = [data_item.cumsum().resample(frequency, how="last").diff() for
                       data_item in data_gross]
        
        data_costs = [data_item.cumsum().resample(frequency, how="last").diff() for
                      data_item in data_costs]

        ## stack de-pool pooled data    
        data_gross=df_from_list(data_gross)    
        data_costs=df_from_list(data_costs)    
        
        ## net gross and costs
        if equalise_gross:
            log.terse("Setting all gross returns to be identical - optimisation driven only by costs")
        if cost_multiplier!=1.0:
            log.terse("Using cost multiplier on optimisation of %.2f" % cost_multiplier)
        
        
        data = work_out_net(data_gross, data_costs, annualisation=annualisation,
                            equalise_gross=equalise_gross, cost_multiplier=cost_multiplier,
                            ceiling_cost_ann_SR=ceiling_cost_SR, 
                            period_target_SR=period_target_SR)
            
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)
    
        ## Now for each time period, estimate weights
        ## create a list of weight vectors
        weight_list=[]
        
        ## create a class object for each period
        opt_results=[]
        
        log.terse("Optimising...")
        
        for fit_period in fit_dates:
            log.msg("Optimising for data from %s to %s" % (str(fit_period.period_start), str(fit_period.period_end)))
            ## Do the optimisation for one period, using a particular optimiser instance
            results_this_period=optSinglePeriod(self, data, fit_period, optimiser, cleaning)

            opt_results.append(results_this_period)

            weights=results_this_period.weights
            
            ## We adjust dates slightly to ensure no overlaps
            dindex=[fit_period.period_start+datetime.timedelta(days=1), 
                    fit_period.period_end-datetime.timedelta(days=1)]
            
            ## create a double row to delineate start and end of test period
            weight_row=pd.DataFrame([weights]*2, index=dindex, columns=data.columns)
            weight_list.append(weight_row)

        ## Stack everything up    
        raw_weight_df=pd.concat(weight_list, axis=0)

        if apply_cost_weight:
            log.terse("Applying cost weighting to optimisation results")
            weight_df = apply_cost_weighting(raw_weight_df, data_gross, data_costs, annualisation)
        else:
            weight_df =raw_weight_df 
        
        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
        setattr(self, "raw_weights", raw_weight_df)

示例#9

0

显示文件

文件： optimisation.py 项目： zxlwrz/pysystemtrade

    def optimise(self):
        """

        Optimise weights over some returns data



        """

        log = self.log
        date_method = self.date_method
        rollyears = self.rollyears
        optimiser = self.optimiser
        cleaning = self.cleaning
        apply_cost_weight = self.apply_cost_weight

        data = getattr(self, "data", None)


        if data is None:
            log.critical("You need to run .set_up_data() before .optimise()")

        fit_dates = generate_fitting_dates(
            data, date_method=date_method, rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)

        # Now for each time period, estimate weights
        # create a list of weight vectors
        weight_list = []

        # create a class object for each period
        opt_results = []

        progress = progressBar(len(fit_dates), "Optimising")

        for fit_period in fit_dates:
            # Do the optimisation for one period, using a particular optimiser
            # instance
            results_this_period = optSinglePeriod(self, data, fit_period,
                                                  optimiser, cleaning)

            opt_results.append(results_this_period)

            weights = results_this_period.weights

            # We adjust dates slightly to ensure no overlaps
            dindex = [
                fit_period.period_start + datetime.timedelta(days=1),
                fit_period.period_end - datetime.timedelta(days=1)
            ]

            # create a double row to delineate start and end of test period
            weight_row = pd.DataFrame(
                [weights] * 2, index=dindex, columns=data.columns)
            weight_list.append(weight_row)
            progress.iterate()

        # Stack everything up
        raw_weight_df = pd.concat(weight_list, axis=0)

        if apply_cost_weight:
            log.terse("Applying cost weighting to optimisation results")
            # ann_SR_costs must be calculated before a cost multiplier is applied
            ann_SR_costs = self.calculate_ann_SR_costs()

            weight_df = apply_cost_weighting(raw_weight_df, ann_SR_costs)
        else:
            weight_df = raw_weight_df

        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
        setattr(self, "raw_weights", raw_weight_df)

示例#10

0

显示文件

文件： correlations.py 项目： TechSurfer1/pysystemtrade

    def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, 
                 dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs):
        """
    
        We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling
        
        Its important that forward filling, or index / ffill / diff has been done before we begin
                
        :param data: Data to get correlations from
        :type data: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param dict_group: dictionary of groupings; used to replace missing values
        :type dict_group: dict
    
        :param boring_offdiag: Value used in creating 'boring' matrix, for when no data
        :type boring_offdiag: float 
    
        :param **kwargs: passed to correlation_single_period
        
        :returns: CorrelationList
        """

        cleaning=str2Bool(cleaning)
    
        ## grouping dictionary, convert to faster, algo friendly, form
        group_dict=group_dict_from_natural(dict_group)

        data=df_from_list(data)    
        column_names=list(data.columns)

        data=data.resample(frequency, how="last")
            
        ### Generate time periods
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)

        size=len(column_names)
        corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag)
        
        ## create a list of correlation matrices
        corr_list=[]
        
        log.terse("Correlation estimate")
        
        ## Now for each time period, estimate correlation
        for fit_period in fit_dates:
            log.msg("Fitting from %s to %s" % (fit_period.period_start, fit_period.period_end))
            
            if fit_period.no_data:
                ## no data to fit with
                corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan)
                corrmat=corr_with_nan
                
            else:
                
                data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] 
                
                corrmat=correlation_single_period(data_for_estimate, 
                                                     **kwargs)

            if cleaning:
                # means we can use earlier correlations with sensible values
                corrmat=clean_correlation(corrmat, corr_with_no_data, boring_offdiag) 

            corr_list.append(corrmat)
        
        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)

示例#11

0

显示文件

文件： optimisation.py 项目： nachobenitez/pysystemtrade

    def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", 
                         rollyears=20, fit_method="bootstrap", cleaning=True,
                         **passed_params):
        
        """
    
        Optimise weights over some returns data
        
        :param data: Returns data
        :type data: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str
    
        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period'
        :type fit_method: str
    
        :param cleaning: Should we clean correlations so can use incomplete data?
        :type cleaning: bool
    
        :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions
        
        :returns: pd.DataFrame of weights
        """
        cleaning=str2Bool(cleaning)
        optimise_params=copy(passed_params)
        
        ## A moments estimator works out the mean, vol, correlation
        moments_estimator=momentsEstimator(optimise_params)

        ## The optimiser instance will do the optimation once we have the appropriate data
        optimiser=optimiserWithParams(optimise_params, moments_estimator)
    
        ## annualisation
        ann_dict=dict(D=BUSINESS_DAYS_IN_YEAR, W=WEEKS_IN_YEAR, M=MONTHS_IN_YEAR, Y=1.0)
        annualisation=ann_dict.get(frequency, 1.0)
    
        ## de-pool pooled data    
        data=df_from_list(data)    
            
        ## resample, indexing before and differencing after (returns, remember)
        data=data.cumsum().resample(frequency, how="last").diff()
    
        ## account for change in frequency
        data=data*annualisation
        
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)
    
        ## Now for each time period, estimate weights
        ## create a list of weight vectors
        weight_list=[]
        
        ## create a class object for each period
        opt_results=[]
        
        log.terse("Optimising...")
        
        for fit_period in fit_dates:
            log.msg("Optimising for data from %s to %s" % (str(fit_period.period_start), str(fit_period.period_end)))
            ## Do the optimisation for one period, using a particular optimiser instance
            results_this_period=optSinglePeriod(self, data, fit_period, optimiser, cleaning)

            opt_results.append(results_this_period)

            weights=results_this_period.weights
            
            ## We adjust dates slightly to ensure no overlaps
            dindex=[fit_period.period_start+datetime.timedelta(days=1), 
                    fit_period.period_end-datetime.timedelta(days=1)]
            
            ## create a double row to delineate start and end of test period
            weight_row=pd.DataFrame([weights]*2, index=dindex, columns=data.columns)
            weight_list.append(weight_row)

        ## Stack everything up    
        weight_df=pd.concat(weight_list, axis=0)
        
        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)

示例#12

0

显示文件

文件： optimisation.py 项目： trrolek/pysystemtrade

    def __init__(self,
                 data_gross,
                 data_costs,
                 log=logtoscreen("optimiser"),
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 fit_method="bootstrap",
                 cleaning=True,
                 equalise_gross=False,
                 cost_multiplier=1.0,
                 apply_cost_weight=True,
                 ceiling_cost_SR=0.13,
                 ann_target_SR=TARGET_ANN_SR,
                 **passed_params):
        """
    
        Optimise weights over some returns data
        
        :param data_gross: Returns data for gross returns
        :type data_gross: pd.DataFrame or list if pooling

        :param data_net: Returns data for costs
        :type data_net: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str
    
        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param fit_method: Method used for fitting, one of 'bootstrap', 'shrinkage', 'one_period'
        :type fit_method: str
    
        :param equalise_gross: Should we equalise expected gross returns so that only costs affect weightings?
        :type equalise_gross: bool

        :param cost_multiplier: Multiply costs by this number
        :type cost_multiplier: float

        :param apply_cost_weight: Should we adjust our weightings to reflect costs?
        :type apply_cost_weight: bool

        :param ceiling_cost_SR: What is the maximum SR cost beyond which I don't allocate to an asset. Set to 999 to avoid using.
        :type ceiling_cost_SR: float
    
        :param *_estimate_params: dicts of **kwargs to pass to moments estimation, and optimisation functions
        
        :returns: pd.DataFrame of weights
        """

        ## Because interaction of parameters is complex, display warnings
        display_warnings(log, cost_multiplier, equalise_gross,
                         apply_cost_weight, **passed_params)

        cleaning = str2Bool(cleaning)
        optimise_params = copy(passed_params)

        ## annualisation
        ann_dict = dict(D=BUSINESS_DAYS_IN_YEAR,
                        W=WEEKS_IN_YEAR,
                        M=MONTHS_IN_YEAR,
                        Y=1.0)
        annualisation = ann_dict.get(frequency, 1.0)

        period_target_SR = ann_target_SR / (annualisation**.5)
        ceiling_cost_SR_period = ceiling_cost_SR / (annualisation**.5)

        ## A moments estimator works out the mean, vol, correlation
        ## Also stores annualisation factor and target SR (used for shrinkage and equalising)
        moments_estimator = momentsEstimator(optimise_params, annualisation,
                                             ann_target_SR)

        ## The optimiser instance will do the optimation once we have the appropriate data
        optimiser = optimiserWithParams(optimise_params, moments_estimator)

        ## resample, indexing before and differencing after (returns, remember)
        data_gross = [
            data_item.cumsum().resample(frequency, how="last").diff()
            for data_item in data_gross
        ]

        data_costs = [
            data_item.cumsum().resample(frequency, how="last").diff()
            for data_item in data_costs
        ]

        ## stack de-pool pooled data
        data_gross = df_from_list(data_gross)
        data_costs = df_from_list(data_costs)

        ## net gross and costs
        if equalise_gross:
            log.terse(
                "Setting all gross returns to be identical - optimisation driven only by costs"
            )
        if cost_multiplier != 1.0:
            log.terse("Using cost multiplier on optimisation of %.2f" %
                      cost_multiplier)

        data = work_out_net(data_gross,
                            data_costs,
                            annualisation=annualisation,
                            equalise_gross=equalise_gross,
                            cost_multiplier=cost_multiplier,
                            ceiling_cost_ann_SR=ceiling_cost_SR,
                            period_target_SR=period_target_SR)

        fit_dates = generate_fitting_dates(data,
                                           date_method=date_method,
                                           rollyears=rollyears)
        setattr(self, "fit_dates", fit_dates)

        ## Now for each time period, estimate weights
        ## create a list of weight vectors
        weight_list = []

        ## create a class object for each period
        opt_results = []

        log.terse("Optimising...")

        for fit_period in fit_dates:
            log.msg("Optimising for data from %s to %s" %
                    (str(fit_period.period_start), str(fit_period.period_end)))
            ## Do the optimisation for one period, using a particular optimiser instance
            results_this_period = optSinglePeriod(self, data, fit_period,
                                                  optimiser, cleaning)

            opt_results.append(results_this_period)

            weights = results_this_period.weights

            ## We adjust dates slightly to ensure no overlaps
            dindex = [
                fit_period.period_start + datetime.timedelta(days=1),
                fit_period.period_end - datetime.timedelta(days=1)
            ]

            ## create a double row to delineate start and end of test period
            weight_row = pd.DataFrame([weights] * 2,
                                      index=dindex,
                                      columns=data.columns)
            weight_list.append(weight_row)

        ## Stack everything up
        raw_weight_df = pd.concat(weight_list, axis=0)

        if apply_cost_weight:
            log.terse("Applying cost weighting to optimisation results")
            weight_df = apply_cost_weighting(raw_weight_df, data_gross,
                                             data_costs, annualisation)
        else:
            weight_df = raw_weight_df

        setattr(self, "results", opt_results)
        setattr(self, "weights", weight_df)
        setattr(self, "raw_weights", raw_weight_df)

示例#13

0

显示文件

文件： correlations.py 项目： ChrisAllisonMalta/pysystemtrade

    def __init__(self,
                 data,
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 **kwargs):
        """

        We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: simData to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param **kwargs: passed to correlationSinglePeriod

        :returns: CorrelationList
        """

        if type(data) is list:

            # turn the list of data into a single dataframe. This will have a unique time series, which we manage
            #   through adding a small offset of a few microseconds

            length_of_data = len(data)
            data_resampled = [
                data_item.resample(frequency).last() for data_item in data
            ]
            data_as_df = df_from_list(data_resampled)

        else:
            length_of_data = 1
            data_as_df = data.resample(frequency).last()

        column_names = list(data_as_df.columns)

        # Generate time periods
        fit_dates = generate_fitting_dates(
            data_as_df, date_method=date_method, rollyears=rollyears)

        # create a single period correlation estimator
        correlation_estimator_for_one_period = correlationSinglePeriod(
            data_as_df, length_of_data=length_of_data, **kwargs)

        # create a list of correlation matrices
        corr_list = []

        progress = progressBar(len(fit_dates), "Estimating correlations")
        # Now for each time period, estimate correlation
        for fit_period in fit_dates:

            progress.iterate()
            corrmat = correlation_estimator_for_one_period.calculate(
                fit_period)
            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)