def testCalculateSlices(self): """Test of _CalculateSlices with powersets.""" column1 = data_source.DataSourceColumn( 'col1', rollup=True, concept_extension='entity:entity') column2 = data_source.DataSourceColumn('col2', rollup=False) column3 = data_source.DataSourceColumn('col3', rollup=True, parent_ref='col5') column4 = data_source.DataSourceColumn('col4', rollup=True, parent_ref='col3') column5 = data_source.DataSourceColumn('col5', rollup=True) column_bundle = data_source.DataSourceColumnBundle( columns=[column1, column2, column3, column4, column5]) slice_column_sets = data_source_to_dspl._CalculateSlices(column_bundle) # Convert columns to id strings slice_column_ids = [] for slice_column_set in slice_column_sets: slice_column_ids.append([c.column_id for c in slice_column_set]) # Sort the actual and expected results so that the test is not order # dependent self.assertEqual( sorted([sorted(s) for s in slice_column_ids]), sorted([ sorted(s) for s in [['col1', 'col2', 'col3'], ['col1', 'col2', 'col4'], ['col1', 'col2', 'col5'], ['col1', 'col2'], ['col2', 'col3'], ['col2', 'col4'], ['col2', 'col5'], ['col2']] ]))
def GetColumnBundle(self): column1 = data_source.DataSourceColumn( 'col1', data_type='string', slice_role='dimension', concept_extension='entity:entity', rollup=True) column2 = data_source.DataSourceColumn( 'col2', data_type='string', concept_extension='geo:location', slice_role='dimension', parent_ref='col6') column3 = data_source.DataSourceColumn('col3', data_type='date', concept_ref='time:year', data_format='yyyy', slice_role='dimension') column4 = data_source.DataSourceColumn('col4', data_type='float', slice_role='metric') column5 = data_source.DataSourceColumn('col5', data_type='integer', slice_role='metric') column6 = data_source.DataSourceColumn('col6', data_type='string', slice_role='dimension', rollup=True) return data_source.DataSourceColumnBundle( columns=[column1, column2, column3, column4, column5, column6])
def _HeaderToColumn(header_string): """Parse the header string for a column. Args: header_string: The complete string for the column header Returns: A DataColumn object populated based on the header data Raises: DataSourceError: If there are any errors in parsing, e.g. if an unrecognized key is found. """ # The column id must be at least one character long, and cannot contain the # characters '[', ']', ';', or whitespace parameters_match = re.match('^([^\]\[;\s]+)(?:\[(.*)\]){0,1}$', header_string.strip().replace('"', '')) if not parameters_match: raise data_source.DataSourceError( 'Formatting error for header string: %s' % header_string) column_id = parameters_match.group(1) column = data_source.DataSourceColumn(column_id, internal_parameters={}) if parameters_match.group(2): # Parse the column parameters key_value_pairs = parameters_match.group(2).split(';') for key_value_pair in key_value_pairs: try: [key, value] = key_value_pair.split('=') except ValueError: raise data_source.DataSourceError( 'Formatting error for header string: %s' % header_string) # Map the key to the appropriate field of the DataSourceColumn object if key == 'type': if value not in ['date', 'float', 'integer', 'string']: raise data_source.DataSourceError( 'Unknown data type for column %s: %s' % (column.column_id, value)) column.data_type = value elif key == 'format': column.data_format = value elif key == 'concept': column.concept_ref = value elif key == 'extends': column.concept_extension = value elif key == 'parent': column.parent_ref = value elif key == 'slice_role': role_value = value.lower() if role_value not in ['dimension', 'metric']: raise data_source.DataSourceError( 'Unrecognized slice_roll in column %s: %s' % (column.column_id, value)) else: column.slice_role = role_value elif key == 'rollup': if value.lower() == 'true': column.rollup = True elif value.lower() == 'false': column.rollup = False else: raise data_source.DataSourceError( 'Unrecognized boolean value in column %s: %s' % (column.column_id, value)) elif key == 'total_val': column.total_val = value elif key == 'dropif': column.internal_parameters['dropif_val'] = value elif key == 'zeroif': column.internal_parameters['zeroif_val'] = value elif key == 'aggregation': if string.lower(value) not in [ 'sum', 'max', 'min', 'avg', 'count' ]: raise data_source.DataSourceError( 'Unknown aggregation for column %s: %s' % (column.column_id, value)) column.internal_parameters['aggregation'] = value else: raise data_source.DataSourceError( 'Unknown parameter for column %s: %s' % (column.column_id, key)) return column
def testAddColumn(self): self.column_bundle.AddColumn( data_source.DataSourceColumn(column_id='col4')) self.assertEqual( self.column_bundle.GetColumnByID('col4').column_id, 'col4')
def setUp(self): self.column_bundle = data_source.DataSourceColumnBundle([ data_source.DataSourceColumn(column_id='col1'), data_source.DataSourceColumn(column_id='col2'), data_source.DataSourceColumn(column_id='col3') ])