def _get_trend(self, trend, tamconfig, data_sources): """Decision tree to select between trend choices. Arguments: trend: explicit trend to use, if any. Pass None to have tamconfig be used. tamconfig: the row from self.tamconfig to use data_sources: dict of dicts of datasources, as described in tam_ref_data_sources in the constructor If a trend was explictly specified, use it. If there is only one data source, use that source without any curve fitting. Otherwise, use the curve fit algorithm specified in the tamconfig. """ if trend: return trend if not interpolation.is_group_name(data_sources=data_sources, name=tamconfig['source_after_2014']): return 'single' else: return tamconfig['trend']
def _low_med_high(self, adoption_data, min_max_sd, adconfig, source, data_sources, region): """Return the selected data sources as Medium, and N stddev away as Low and High.""" result = pd.DataFrame(index=adoption_data.index.copy(), columns=['Low', 'Medium', 'High']) region_key = None if region is None else f'Region: {region}' columns = interpolation.matching_data_sources(data_sources=data_sources, name=source, groups_only=False, region_key=region_key) if columns is None: result.loc[:, 'Medium'] = np.nan result.loc[:, 'Low'] = np.nan result.loc[:, 'High'] = np.nan else: if len(columns) == 1: is_group = interpolation.is_group_name(data_sources=data_sources, name=columns[0]) else: is_group = True if is_group: # In Excel, the Mean computation is: # SUM($C46:$Q46)/COUNTIF($C46:$Q46,">0") # # The intent is to skip sources which are empty, but also means that # a source where the real data is 0.0 will not impact the Medium result. # # See this document for more information: # https://docs.google.com/document/d/19sq88J_PXY-y_EnqbSJDl0v9CdJArOdFLatNNUFhjEA/edit#heading=h.yvwwsbvutw2j # # We're matching the Excel behavior in the initial product. This decision can # be revisited later, when matching results from Excel is no longer required. # To revert, use: medium = adoption_data.loc[:, columns].mean(axis=1) medium = adoption_data.loc[:, columns].mask(lambda f: f == 0.0, np.nan).mean(axis=1) else: # if there is only a single source, Excel uses it directly without taking a Mean. medium = adoption_data.loc[:, columns[0]] result.loc[:, 'Medium'] = medium result.loc[:, 'Low'] = medium - (min_max_sd.loc[:, 'S.D'] * adconfig.loc['low_sd_mult']) result.loc[:, 'High'] = medium + ( min_max_sd.loc[:, 'S.D'] * adconfig.loc['high_sd_mult']) return result
def adoption_is_single_source(self): """Whether the source data selected is one source or multiple.""" return not interpolation.is_group_name( data_sources=self.data_sources, name=self.ac.soln_pds_adoption_prognostication_source)