Пример #1
0
    def __init__(self, data, frequency="W",
                 date_method="expanding", rollyears=20, 
                 dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs):
        cleaning=str2Bool(cleaning)
        group_dict=group_dict_from_natural(dict_group)
        data=df_from_list(data)    
        column_names=list(data.columns)
        data=data.resample(frequency, how="last")
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)
        size=len(column_names)
        corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag)        
        corr_list=[]        
        print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Correlation estimate")
        for fit_period in fit_dates:
            print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Estimating from %s to %s" % (fit_period.period_start, fit_period.period_end))            
            if fit_period.no_data:
                corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan)
                corrmat=corr_with_nan                
            else:                
                data_for_estimate=data[fit_period.fit_start:fit_period.fit_end]  
                corrmat=correlation_single_period(data_for_estimate, **kwargs)

            if cleaning:
                current_period_data=data[fit_period.fit_start:fit_period.fit_end] 
                must_haves=must_have_item(current_period_data)
                corrmat=clean_correlation(corrmat, corr_with_no_data, must_haves) 

            corr_list.append(corrmat)
        
        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Пример #2
0
    def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", 
                 rollyears=20, 
                 dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs):
        cleaning=str2Bool(cleaning)
    
        ## grouping dictionary, convert to faster, algo friendly, form
        group_dict=group_dict_from_natural(dict_group)

        data=df_from_list(data)    
        column_names=list(data.columns)

        data=data.resample(frequency, how="last")
            
        ### Generate time periods
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)

        size=len(column_names)
        corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag)
        
        ## create a list of correlation matrices
        corr_list=[]
        
        print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Correlation estimate")
        
        ## Now for each time period, estimate correlation
        for fit_period in fit_dates:
            print(__file__ + ":" + str(inspect.getframeinfo(inspect.currentframe())[:3][1]) + ":" +"Estimating from %s to %s" % (fit_period.period_start, fit_period.period_end))
            
            if fit_period.no_data:
                ## no data to fit with
                corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan)
                corrmat=corr_with_nan
                
            else:
                
                data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] 
                
                corrmat=correlation_single_period(data_for_estimate, 
                                                     **kwargs)

            if cleaning:
                current_period_data=data[fit_period.fit_start:fit_period.fit_end] 
                must_haves=must_have_item(current_period_data)

                # means we can use earlier correlations with sensible values
                corrmat=clean_correlation(corrmat, corr_with_no_data, must_haves) 

            corr_list.append(corrmat)
        
        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Пример #3
0
    def __init__(self,
                 data,
                 log=logtoscreen("optimiser"),
                 frequency="W",
                 date_method="expanding",
                 rollyears=20,
                 dict_group=dict(),
                 boring_offdiag=0.99,
                 cleaning=True,
                 **kwargs):
        """

        We generate a correlation from either a pd.DataFrame, or a list of them if we're pooling

        Its important that forward filling, or index / ffill / diff has been done before we begin

        :param data: Data to get correlations from
        :type data: pd.DataFrame or list if pooling

        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates
        :type date_method: str

        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int

        :param dict_group: dictionary of groupings; used to replace missing values
        :type dict_group: dict

        :param boring_offdiag: Value used in creating 'boring' matrix, for when no data
        :type boring_offdiag: float

        :param **kwargs: passed to correlation_single_period

        :returns: CorrelationList
        """

        cleaning = str2Bool(cleaning)

        # grouping dictionary, convert to faster, algo friendly, form
        group_dict = group_dict_from_natural(dict_group)

        data = df_from_list(data)
        column_names = list(data.columns)

        data = data.resample(frequency).last()

        # Generate time periods
        fit_dates = generate_fitting_dates(data,
                                           date_method=date_method,
                                           rollyears=rollyears)

        size = len(column_names)
        corr_with_no_data = boring_corr_matrix(size, offdiag=boring_offdiag)

        # create a list of correlation matrices
        corr_list = []

        progress = progressBar(len(fit_dates), "Estimating correlations")
        # Now for each time period, estimate correlation
        for fit_period in fit_dates:
            progress.iterate()
            if fit_period.no_data:
                # no data to fit with
                corr_with_nan = boring_corr_matrix(size,
                                                   offdiag=np.nan,
                                                   diag=np.nan)
                corrmat = corr_with_nan

            else:

                data_for_estimate = data[fit_period.fit_start:fit_period.
                                         fit_end]

                corrmat = correlation_single_period(data_for_estimate,
                                                    **kwargs)

            if cleaning:
                current_period_data = data[fit_period.fit_start:fit_period.
                                           fit_end]
                must_haves = must_have_item(current_period_data)

                # means we can use earlier correlations with sensible values
                corrmat = clean_correlation(corrmat, corr_with_no_data,
                                            must_haves)

            corr_list.append(corrmat)

        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)
Пример #4
0
    def __init__(self, data, log=logtoscreen("optimiser"), frequency="W", date_method="expanding", rollyears=20, 
                 dict_group=dict(), boring_offdiag=0.99, cleaning=True, **kwargs):
        """
    
        We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling
        
        Its important that forward filling, or index / ffill / diff has been done before we begin
                
        :param data: Data to get correlations from
        :type data: pd.DataFrame or list if pooling
    
        :param frequency: Downsampling frequency. Must be "D", "W" or bigger
        :type frequency: str

        :param date_method: Method to pass to generate_fitting_dates 
        :type date_method: str
    
        :param roll_years: If date_method is "rolling", number of years in window
        :type roll_years: int
    
        :param dict_group: dictionary of groupings; used to replace missing values
        :type dict_group: dict
    
        :param boring_offdiag: Value used in creating 'boring' matrix, for when no data
        :type boring_offdiag: float 
    
        :param **kwargs: passed to correlation_single_period
        
        :returns: CorrelationList
        """

        cleaning=str2Bool(cleaning)
    
        ## grouping dictionary, convert to faster, algo friendly, form
        group_dict=group_dict_from_natural(dict_group)

        data=df_from_list(data)    
        column_names=list(data.columns)

        data=data.resample(frequency, how="last")
            
        ### Generate time periods
        fit_dates = generate_fitting_dates(data, date_method=date_method, rollyears=rollyears)

        size=len(column_names)
        corr_with_no_data=boring_corr_matrix(size, offdiag=boring_offdiag)
        
        ## create a list of correlation matrices
        corr_list=[]
        
        log.terse("Correlation estimate")
        
        ## Now for each time period, estimate correlation
        for fit_period in fit_dates:
            log.msg("Fitting from %s to %s" % (fit_period.period_start, fit_period.period_end))
            
            if fit_period.no_data:
                ## no data to fit with
                corr_with_nan=boring_corr_matrix(size, offdiag=np.nan, diag=np.nan)
                corrmat=corr_with_nan
                
            else:
                
                data_for_estimate=data[fit_period.fit_start:fit_period.fit_end] 
                
                corrmat=correlation_single_period(data_for_estimate, 
                                                     **kwargs)

            if cleaning:
                # means we can use earlier correlations with sensible values
                corrmat=clean_correlation(corrmat, corr_with_no_data, boring_offdiag) 

            corr_list.append(corrmat)
        
        setattr(self, "corr_list", corr_list)
        setattr(self, "columns", column_names)
        setattr(self, "fit_dates", fit_dates)