示例#1
0
    def testCalculateSlices(self):
        """Test of _CalculateSlices with powersets."""
        column1 = data_source.DataSourceColumn(
            'col1', rollup=True, concept_extension='entity:entity')
        column2 = data_source.DataSourceColumn('col2', rollup=False)
        column3 = data_source.DataSourceColumn('col3',
                                               rollup=True,
                                               parent_ref='col5')
        column4 = data_source.DataSourceColumn('col4',
                                               rollup=True,
                                               parent_ref='col3')
        column5 = data_source.DataSourceColumn('col5', rollup=True)

        column_bundle = data_source.DataSourceColumnBundle(
            columns=[column1, column2, column3, column4, column5])

        slice_column_sets = data_source_to_dspl._CalculateSlices(column_bundle)

        # Convert columns to id strings
        slice_column_ids = []

        for slice_column_set in slice_column_sets:
            slice_column_ids.append([c.column_id for c in slice_column_set])

        # Sort the actual and expected results so that the test is not order
        # dependent
        self.assertEqual(
            sorted([sorted(s) for s in slice_column_ids]),
            sorted([
                sorted(s)
                for s in [['col1', 'col2', 'col3'], ['col1', 'col2', 'col4'],
                          ['col1', 'col2', 'col5'], ['col1', 'col2'],
                          ['col2', 'col3'], ['col2', 'col4'], ['col2', 'col5'],
                          ['col2']]
            ]))
示例#2
0
    def GetColumnBundle(self):
        column1 = data_source.DataSourceColumn(
            'col1',
            data_type='string',
            slice_role='dimension',
            concept_extension='entity:entity',
            rollup=True)
        column2 = data_source.DataSourceColumn(
            'col2',
            data_type='string',
            concept_extension='geo:location',
            slice_role='dimension',
            parent_ref='col6')
        column3 = data_source.DataSourceColumn('col3',
                                               data_type='date',
                                               concept_ref='time:year',
                                               data_format='yyyy',
                                               slice_role='dimension')
        column4 = data_source.DataSourceColumn('col4',
                                               data_type='float',
                                               slice_role='metric')
        column5 = data_source.DataSourceColumn('col5',
                                               data_type='integer',
                                               slice_role='metric')
        column6 = data_source.DataSourceColumn('col6',
                                               data_type='string',
                                               slice_role='dimension',
                                               rollup=True)

        return data_source.DataSourceColumnBundle(
            columns=[column1, column2, column3, column4, column5, column6])
示例#3
0
def _HeaderToColumn(header_string):
    """Parse the header string for a column.

  Args:
    header_string: The complete string for the column header

  Returns:
    A DataColumn object populated based on the header data

  Raises:
    DataSourceError: If there are any errors in parsing, e.g. if an unrecognized
                     key is found.
  """
    # The column id must be at least one character long, and cannot contain the
    # characters '[', ']', ';', or whitespace
    parameters_match = re.match('^([^\]\[;\s]+)(?:\[(.*)\]){0,1}$',
                                header_string.strip().replace('"', ''))

    if not parameters_match:
        raise data_source.DataSourceError(
            'Formatting error for header string: %s' % header_string)

    column_id = parameters_match.group(1)
    column = data_source.DataSourceColumn(column_id, internal_parameters={})

    if parameters_match.group(2):
        # Parse the column parameters
        key_value_pairs = parameters_match.group(2).split(';')

        for key_value_pair in key_value_pairs:
            try:
                [key, value] = key_value_pair.split('=')
            except ValueError:
                raise data_source.DataSourceError(
                    'Formatting error for header string: %s' % header_string)

            # Map the key to the appropriate field of the DataSourceColumn object
            if key == 'type':
                if value not in ['date', 'float', 'integer', 'string']:
                    raise data_source.DataSourceError(
                        'Unknown data type for column %s: %s' %
                        (column.column_id, value))

                column.data_type = value
            elif key == 'format':
                column.data_format = value
            elif key == 'concept':
                column.concept_ref = value
            elif key == 'extends':
                column.concept_extension = value
            elif key == 'parent':
                column.parent_ref = value
            elif key == 'slice_role':
                role_value = value.lower()

                if role_value not in ['dimension', 'metric']:
                    raise data_source.DataSourceError(
                        'Unrecognized slice_roll in column %s: %s' %
                        (column.column_id, value))
                else:
                    column.slice_role = role_value
            elif key == 'rollup':
                if value.lower() == 'true':
                    column.rollup = True
                elif value.lower() == 'false':
                    column.rollup = False
                else:
                    raise data_source.DataSourceError(
                        'Unrecognized boolean value in column %s: %s' %
                        (column.column_id, value))
            elif key == 'total_val':
                column.total_val = value
            elif key == 'dropif':
                column.internal_parameters['dropif_val'] = value
            elif key == 'zeroif':
                column.internal_parameters['zeroif_val'] = value
            elif key == 'aggregation':
                if string.lower(value) not in [
                        'sum', 'max', 'min', 'avg', 'count'
                ]:
                    raise data_source.DataSourceError(
                        'Unknown aggregation for column %s: %s' %
                        (column.column_id, value))

                column.internal_parameters['aggregation'] = value
            else:
                raise data_source.DataSourceError(
                    'Unknown parameter for column %s: %s' %
                    (column.column_id, key))
    return column
示例#4
0
 def testAddColumn(self):
     self.column_bundle.AddColumn(
         data_source.DataSourceColumn(column_id='col4'))
     self.assertEqual(
         self.column_bundle.GetColumnByID('col4').column_id, 'col4')
示例#5
0
 def setUp(self):
     self.column_bundle = data_source.DataSourceColumnBundle([
         data_source.DataSourceColumn(column_id='col1'),
         data_source.DataSourceColumn(column_id='col2'),
         data_source.DataSourceColumn(column_id='col3')
     ])