def test_get_catalog_various_schemas(self, mock_get_schemas, mock_execute):
        column_names = ['table_database', 'table_schema', 'table_name']
        rows = [
            ('dbt', 'foo', 'bar'),
            ('dbt', 'FOO', 'baz'),
            ('dbt', None, 'bar'),
            ('dbt', 'quux', 'bar'),
            ('dbt', 'skip', 'bar'),
        ]
        mock_execute.return_value = agate.Table(rows=rows,
                                                column_names=column_names)

        mock_get_schemas.return_value.items.return_value = [
            (mock.MagicMock(database='dbt'), {'foo', 'FOO', 'quux'})
        ]

        mock_manifest = mock.MagicMock()
        mock_manifest.get_used_schemas.return_value = {('dbt', 'foo'),
                                                       ('dbt', 'quux')}

        catalog, exceptions = self.adapter.get_catalog(mock_manifest)
        self.assertEqual(set(map(tuple, catalog)), {('dbt', 'foo', 'bar'),
                                                    ('dbt', 'FOO', 'baz'),
                                                    ('dbt', 'quux', 'bar')})
        self.assertEqual(exceptions, [])
示例#2
0
def load_data(data):
    text_type = agate.Text()
    number_type = agate.Number()
    boolean_type = agate.Boolean()

    columns = (
        ('last_name', text_type),
        ('first_name', text_type),
        ('age', number_type),
        ('race', text_type),
        ('state', text_type),
        ('tags', text_type),
        ('crime', text_type),
        ('sentence', text_type),
        ('convicted', number_type),
        ('exonerated', number_type),
        ('dna', boolean_type),
        ('dna_essential', text_type),
        ('mistaken_witness', boolean_type),
        ('false_confession', boolean_type),
        ('perjury', boolean_type),
        ('false_evidence', boolean_type),
        ('official_misconduct', boolean_type),
        ('inadequate_defense', boolean_type),
    )

    with open('examples/realdata/exonerations-20150828.csv') as f:
        # Create a csv reader
        reader = csv.reader(f)

        # Skip header
        next(f)

        # Create the table
        data['exonerations'] = agate.Table(reader, columns)
示例#3
0
    def setUp(self):
        text_type = agate.Text()
        number_type = agate.Number()

        columns = (('gender', text_type), ('month', number_type),
                   ('median', number_type), ('stdev', number_type),
                   ('1st', number_type), ('3rd', number_type),
                   ('5th', number_type), ('15th', number_type), ('25th',
                                                                 number_type),
                   ('50th', number_type), ('75th', number_type), ('85th',
                                                                  number_type),
                   ('95th', number_type), ('97th', number_type), ('99th',
                                                                  number_type))

        with open('examples/heights.csv') as f:
            # Create a csv reader
            reader = csv.reader(f)

            # Skip header
            next(f)

            # Create the table
            self.table = agate.Table(reader, columns)

        if os.path.exists(TEST_FILENAME):
            os.remove(TEST_FILENAME)
示例#4
0
def sql_query(self, query, table_name='agate'):
    """
    Convert this agate table into an intermediate, in-memory sqlite table,
    run a query against it, and then return the results as a new agate table.

    Multiple queries may be separated with semicolons.

    :param query:
        One SQL query, or multiple queries to be run consecutively separated
        with semicolons.
    :param table_name:
        The name to use for the table in the queries, defaults to ``agate``.
    """
    _, connection = get_engine_and_connection()

    # Execute the specified SQL queries
    queries = query.split(';')
    rows = None

    self.to_sql(connection, table_name)

    for q in queries:
        if q:
            rows = connection.execute(q)

    table = agate.Table(list(rows), column_names=rows._metadata.keys)

    return table
示例#5
0
def write_weighted_means_csv():
    column_names = ['county_type']
    column_types = [text_type]

    for age in ages:
        for income in incomes:
            column_names.append('weighted_mean_{0}yo_{1}k'.format(age, income))
            column_types.append(number_type)

    county_types = [(rural_weighted, 'rural'),
                    (small_towns_weighted, 'small_towns'),
                    (metro_weighted, 'metro')]

    rows = []
    for county_type in county_types:
        row = [county_type[1]]
        total_population = county_type[0].aggregate(agate.Sum('Population'))
        for age in ages:
            for income in incomes:
                score = county_type[0].aggregate(
                    agate.Sum('weighted_score_{0}yo_{1}k'.format(age, income)))
                row.append(score / total_population)

        rows.append(row)

    table = agate.Table(rows, column_names,
                        column_types).to_csv('data/output/weighted_means.csv')
示例#6
0
    def get_columns_in_relation(self, relation):
        _, results = self.connections.execute(f"pragma {relation.schema}.table_info({relation.identifier})", fetch=True)

        new_rows = []
        for row in results:
            new_row = [
                row[1],
                row[2] or 'TEXT',
                None,
                None,
                None
            ]
            new_rows.append(new_row)

        column_names = [
            'column_name',
            'data_type',
            'character_maximum_length',
            'numeric_precision',
            'numeric_scale'
        ]

        table = agate.Table(new_rows, column_names)

        kwargs = {
            'table': table
        }

        result = self.execute_macro(
            'sql_convert_columns_in_relation',
            kwargs=kwargs
        )
        return result
示例#7
0
    def test_chunk_size(self):
        column_names = ['number']
        column_types = [agate.Number()]

        rows = []
        expected = 0
        for n in range(9999):
            rows.append((n, ))
            expected += n

        engine = create_engine(self.connection_string)
        connection = engine.connect()

        try:
            table = agate.Table(rows, column_names, column_types)
            table.to_sql(connection,
                         'test_chunk_size',
                         overwrite=True,
                         chunk_size=100)

            table = agate.Table.from_sql(connection, 'test_chunk_size')
            actual = sum(r[0] for r in table.rows)
            self.assertEqual(len(table.rows), len(rows))
            self.assertEqual(expected, actual)
        finally:
            connection.close()
            engine.dispose()
示例#8
0
    def setUp(self):
        self.rows = (
            (1.123, 'a', True, '11/4/2015', '11/4/2015 12:22 PM'),
            (2, u'👍', False, '11/5/2015', '11/4/2015 12:45 PM'),
            (2, u'c', False, '11/5/2015', '11/4/2015 12:45 PM'),
            (None, 'b', None, None, None),
        )

        self.column_names = [
            'number',
            'text',
            'boolean',
            'date',
            'datetime',
        ]

        self.column_types = [
            agate.Number(),
            agate.Text(),
            agate.Boolean(),
            agate.Date(),
            agate.DateTime(),
        ]

        self.table = agate.Table(self.rows, self.column_names,
                                 self.column_types)
        self.connection_string = 'sqlite:///:memory:'
示例#9
0
def from_sql(cls, connection_or_string, table_name):
    """
    Create a new :class:`agate.Table` from a given SQL table. Types will be
    inferred from the database schema.

    Monkey patched as class method :meth:`Table.from_sql`.

    :param connection_or_string:
        An existing sqlalchemy connection or connection string.
    :param table_name:
        The name of a table in the referenced database.
    """
    engine, connection = get_engine_and_connection(connection_or_string)

    metadata = MetaData(connection)
    sql_table = Table(table_name,
                      metadata,
                      autoload=True,
                      autoload_with=connection)

    column_names = []
    column_types = []

    for sql_column in sql_table.columns:
        column_names.append(sql_column.name)

        if type(sql_column.type) in INTERVAL_MAP.values():
            py_type = datetime.timedelta
        else:
            py_type = sql_column.type.python_type

        if py_type in [int, float, decimal.Decimal]:
            if py_type is float:
                sql_column.type.asdecimal = True
            column_types.append(agate.Number())
        elif py_type is bool:
            column_types.append(agate.Boolean())
        elif issubclass(py_type, six.string_types):
            column_types.append(agate.Text())
        elif py_type is datetime.date:
            column_types.append(agate.Date())
        elif py_type is datetime.datetime:
            column_types.append(agate.DateTime())
        elif py_type is datetime.timedelta:
            column_types.append(agate.TimeDelta())
        else:
            raise ValueError('Unsupported sqlalchemy column type: %s' %
                             type(sql_column.type))

    s = select([sql_table])

    rows = connection.execute(s)

    try:
        return agate.Table(rows, column_names, column_types)
    finally:
        if engine is not None:
            connection.close()
            engine.dispose()
示例#10
0
 def _make_table_of(self, rows, column_types):
     column_names = list(string.ascii_letters[:len(rows[0])])
     if isinstance(column_types, type):
         column_types = [self._get_tester_for(column_types) for _ in column_names]
     else:
         column_types = [self._get_tester_for(typ) for typ in column_types]
     table = agate.Table(rows, column_names=column_names, column_types=column_types)
     return table
示例#11
0
def get_table(new_arr, types, titles):
    """ Return an agate table when given an array of data, list of types and
    list of titles."""
    try:
        table = agate.Table(new_arr, titles, types)
        return table
    except Exception as e:
        print(e)
示例#12
0
    def _get_one_catalog(
        self,
        information_schema: InformationSchema,
        schemas: Set[str],
        manifest: Manifest,
    ) -> agate.Table:
        """
        bad form to override this method but...
        """

        # this does N+1 queries but there doesn't seem to be
        # any other way to do this

        rows = []
        for schema in schemas:

            schema_obj = self.Relation.create(database=information_schema.database, schema=schema)
            results = self.list_relations_without_caching(schema_obj)

            if len(results) > 0:
                for relation_row in results:
                    name = relation_row.name
                    relation_type = str(relation_row.type)

                    table_info = self.connections.execute(
                        f"pragma {schema}.table_info({name})", fetch=True)

                    for table_row in table_info[1]:
                        rows.append([
                            information_schema.database,
                            schema,
                            name,
                            relation_type,
                            '',
                            '',
                            table_row['name'],
                            table_row['cid'],
                            table_row['type'] or 'TEXT',
                            ''
                        ])

        column_names = [
            'table_database',
            'table_schema',
            'table_name',
            'table_type',
            'table_comment',
            'table_owner',
            'column_name',
            'column_index',
            'column_type',
            'column_comment'
        ]
        table = agate.Table(rows, column_names)

        results = self._catalog_filter_table(table, manifest)
        return results
    def attr_test_output(self, obj_type, attr_name, results):
        # Load the data
        table = agate.Table(results, ['group', 'model', attr_name])

        # Count the number with no __str__
        fails = table.where(lambda row: row[attr_name] is False)
        if fails.rows:
            print("Fail: %s %ss are missing %s" %
                  (len(fails.rows), obj_type, attr_name))
            fails.select(["group", "model"]).print_table(max_column_width=50)
示例#14
0
def table_from_rows(
    rows: List[Any],
    column_names: Iterable[str],
    text_only_columns: Optional[Iterable[str]] = None,
) -> agate.Table:
    if text_only_columns is None:
        column_types = DEFAULT_TYPE_TESTER
    else:
        column_types = build_type_tester(text_only_columns)
    return agate.Table(rows, column_names, column_types=column_types)
示例#15
0
def from_xlsx(cls, path, sheet=None):
    """
    Parse an XLSX file.

    :param path:
        Path to an XLSX file to load or a file or file-like object for one.
    :param sheet:
        The name or integer index of a worksheet to load. If not specified
        then the "active" sheet will be used.
    """
    if hasattr(path, 'read'):
        f = path
    else:
        f = open(path, 'rb')

    book = openpyxl.load_workbook(f, read_only=True, data_only=True)

    if isinstance(sheet, six.string_types):
        sheet = book[sheet]
    elif isinstance(sheet, int):
        sheet = book.worksheets[sheet]
    else:
        sheet = book.active

    column_names = []
    rows = []

    for i, row in enumerate(sheet.rows):
        if i == 0:
            column_names = [c.value for c in row]
            continue

        values = []

        for c in row:
            value = c.value

            if value.__class__ is datetime.datetime:
                # Handle default XLSX date as 00:00 time
                if value.date() == datetime.date(1904, 1, 1) and not has_date_elements(c):
                    value = value.time()

                    value = normalize_datetime(value)
                elif value.time() == NULL_TIME:
                    value = value.date()
                else:
                    value = normalize_datetime(value)

            values.append(value)

        rows.append(values)

    f.close()

    return agate.Table(rows, column_names)
示例#16
0
    def test_make_sql_table_min_col_len(self):
        rows = ((1, 'x' * 10), (2, ''))
        column_names = ['id', 'name']
        column_types = [agate.Number(), agate.Text()]
        table = agate.Table(rows, column_names, column_types)

        sql_table = agatesql.table.make_sql_table(table, 'test_table', dialect='mysql', db_schema='test_schema', 
                                                  constraints=True, min_col_len=20)


        self.assertEquals(sql_table.columns.get('name').type.length, 20)
示例#17
0
    def test_join(self):
        left_rows = [(six.text_type(i), i) for i in range(100000)]
        right_rows = [(six.text_type(i), i) for i in range(100000)]

        shuffle(left_rows)
        shuffle(right_rows)

        column_names = ['text', 'number']
        column_types = [agate.Text(), agate.Number()]

        left = agate.Table(left_rows, column_names, column_types)
        right = agate.Table(right_rows, column_names, column_types)

        def test():
            left.join(right, 'text')

        results = Timer(test).repeat(10, 1)

        min_time = min(results)

        self.assertLess(min_time, 10)  # CI unreliable, 5s witnessed
    def test_create_if_not_exists(self):
        column_names = ['id', 'name']
        column_types = [agate.Number(), agate.Text()]
        rows1 = (
            (1, 'Jake'),
            (2, 'Howard'),
        )
        rows2 = (
            (3, 'Liz'),
            (4, 'Tim'),
        )

        table1 = agate.Table(rows1, column_names, column_types)
        table2 = agate.Table(rows2, column_names, column_types)

        engine = create_engine(self.connection_string)
        connection = engine.connect()

        # Write two agate tables into the same SQL table
        table1.to_sql(connection, 'create_if_not_exists_test', create=True, create_if_not_exists=True, insert=True)
        table2.to_sql(connection, 'create_if_not_exists_test', create=True, create_if_not_exists=True, insert=True)
示例#19
0
def from_dbf(cls, path, encoding=None):
    """
    Parse a DBF file.

    :param path:
        Path to an DBF file to load. Note that due to limitations of the
        dependency you can not pass a file handle. It must be a path.
    """
    dbf = DBF(path, load=True, encoding=encoding, recfactory=recfactory)
    table = agate.Table(dbf.records, column_names=dbf.field_names)

    return table
示例#20
0
    def test_to_sql_create_statement_wide_width(self):
        rows = ((1, 'x' * 21845), (2, ''))
        column_names = ['id', 'name']
        column_types = [agate.Number(), agate.Text()]
        table = agate.Table(rows, column_names, column_types)

        statement = table.to_sql_create_statement('test_table', db_schema='test_schema', dialect='mysql')

        self.assertEqual(statement.replace('\t', '  '), '''CREATE TABLE test_schema.test_table (
  id DECIMAL(38, 0) NOT NULL, 
  name TEXT
);''')  # noqa
    def test_choices(self):
        """
        Verify that valid choices are available for all expected fields on all models.
        """
        # substrings that appear in choice fields
        choice_field_strs = [
            '_cd',
            '_code',
            '_type',
            'status',
            '_lvl',
            'reportname',
            'form_id',
        ]
        exceptions = [
            'LookupCodesCd.code_type',
            'S497Cd.sup_off_cd',
            'FilerStatusTypesCd.status_type',
            'FilerStatusTypesCd.status_desc',
            'FilerTypesCd.filer_type',
        ]

        results = []
        model_list = sorted(get_model_list(),
                            key=lambda x: (x().klass_group, x().klass_name))

        for m in model_list:
            for f in m._meta.fields:
                if (any(x in f.name
                        for x in choice_field_strs) and f.name != 'memo_code'
                        and f.__class__ is not ForeignKeyField
                        and '{}.{}'.format(m().klass_name,
                                           f.name) not in exceptions):
                    if not f.choices:
                        results.append((m().klass_group, m.__name__, f.name,
                                        "Has no CHOICES defined"))

                    if not f.documentcloud_pages:
                        results.append(
                            (m().klass_group, m.__name__, f.name,
                             "Has no `documentcloud_pages` defined"))

                    # Pull out all the choices in that field
                    for slug, name in f.choices:
                        # Make sure that each has a definition
                        if not name or name == '':
                            results.append(
                                (m().klass_group, m.__name__, f.name,
                                 "Value '%s' undefined in CHOICES" % slug))

        table = agate.Table(results, ['group', 'model', 'field', 'message'])
        table.print_table(max_rows=None, max_column_width=50)
示例#22
0
    def test_lookup_require_match(self):
        rows = (('WA', ), ('VA', ), ('FA', ))

        column_names = ['usps']
        column_types = [agate.Text()]

        table = agate.Table(rows, column_names, column_types)

        with self.assertRaises(ValueError):
            result = table.lookup('usps',
                                  'state',
                                  require_match=True,
                                  source=self._source)
示例#23
0
def table_from_data(data, column_names):
    "Convert list of dictionaries into an Agate table"

    # The agate table is generated from a list of dicts, so the column order
    # from `data` is not preserved. We can use `select` to reorder the columns
    #
    # If there is no data, create an empty table with the specified columns

    if len(data) == 0:
        return agate.Table([], column_names=column_names)
    else:
        table = agate.Table.from_object(data, column_types=DEFAULT_TYPE_TESTER)
        return table.select(column_names)
示例#24
0
def dbf2csv(f, **kwargs):
    """
    Convert a dBASE .dbf file to csv.
    """
    with dbf.Table(f.name) as db:
        column_names = db.field_names
        table = agate.Table(db, column_names)

    output = six.StringIO()
    table.to_csv(output)
    result = output.getvalue()
    output.close()

    return result
示例#25
0
    def test_distinct_values(self):
        column_names: List = [
            'id',
            'name',
            'dob',
            'last seen',
            'size',
            'active',
        ]
        column_types: List = [
            agate.Number(),
            agate.Text(),
            agate.Date(),
            agate.DateTime(),
            agate.Text(),
            agate.Boolean(),
        ]

        rows = [(1, 'Alvin Cotton', '03-01-1980', '06-30-2019 12:12:00', 'L',
                 True),
                (2, 'Usmaan Rojas', '01-12-1978', '06-30-2019 12:12:00', 'S',
                 False),
                (3, 'Kingston Odling', '04-09-1990', '06-30-2019 12:12:00',
                 'M', True),
                (3, 'Pooja Gillespie', '10-07-1985', '06-30-2019 12:12:00',
                 'S', True),
                (4, 'Hal Blake', '08-17-1989', '06-30-2019 12:12:00', 'L',
                 True),
                (5, 'Shannen Blevins', '06-10-1981', '06-30-2019 12:12:00',
                 'M', False),
                (5, 'Courteney Weston', '04-23-1992', '06-30-2019 12:12:00',
                 'M', False),
                (6, 'Conner Calhoun', '05-16-1977', '06-30-2019 12:12:00',
                 'XL', True),
                (7, 'Susie Rasmussen', '02-08-1987', '06-30-2019 12:12:00',
                 'L', False),
                (8, 'Cassie Beltran', '12-15-1982', '06-30-2019 12:12:00', 'M',
                 True)]

        model = csvhound.core.BaseHound()
        table = model.get_table_from_file('sample-data/test-distinct.csv')
        distinct = model.distinct_values('size')
        agate_table = agate.Table(rows, column_names, column_types)
        distinct_agate = agate_table.select('size').distinct('size')

        # now do the testing
        self.assertColumnNames(distinct, ('size', ))
        self.assertColumnTypes(distinct,
                               [type(c) for c in distinct.column_types])
        self.assertRows(distinct, distinct_agate)
示例#26
0
    def test_join(self):
        left_rows = [(six.text_type(i), i) for i in range(100000)]
        right_rows = [(six.text_type(i), i) for i in range(100000)]

        shuffle(left_rows)
        shuffle(right_rows)

        number_type = agate.Number()
        text_type = agate.Text()

        columns = (('text', text_type), ('number', number_type))

        left = agate.Table(left_rows, columns)
        right = agate.Table(right_rows, columns)

        def test():
            left.join(right, 'text')

        results = Timer(test).repeat(10, 1)

        min_time = min(results)

        self.assertLess(min_time, 0)
示例#27
0
    def test_lookup_no_match(self):
        rows = (('WA', ), ('VA', ), ('FA', ))

        column_names = ['usps']
        column_types = [agate.Text()]

        table = agate.Table(rows, column_names, column_types)

        result = table.lookup('usps', 'state', source=self._source)

        self.assertColumnNames(result, ['usps', 'state'])
        self.assertColumnTypes(result, [agate.Text, agate.Text])

        self.assertSequenceEqual(result.rows[2].values(), ['FA', None])
示例#28
0
def table_from_data_flat(data, column_names):
    "Convert list of dictionaries into an Agate table"

    rows = []
    for _row in data:
        row = []
        for value in list(_row.values()):
            if isinstance(value, (dict, list, tuple)):
                row.append(json.dumps(value))
            else:
                row.append(value)
        rows.append(row)

    return agate.Table(rows, column_names)
def main():
    df = pd.read_csv(SRC_PATH, dtype=str)
    counts = df['date'].value_counts().sort_index()
    # just get first 10 rows and last 10 rows
    counts = pd.concat([
        counts.head(10),
        pd.Series({'...': None}, name='date'),
        counts.tail(10)
    ])
    vals = [
        [k, v] for k, v in counts.to_dict().items()
    ]  # is there really no way to convert a Pandas series to list-of-lists?
    table = agate.Table(vals, ['date', 'count'],
                        [agate.Text(), agate.Number()])
    table.print_bars('date', 'count')
示例#30
0
    def test_create_if_not_exists(self):
        column_names = ['id', 'name']
        column_types = [agate.Number(), agate.Text()]
        rows1 = (
            (1, 'Jake'),
            (2, 'Howard'),
        )
        rows2 = (
            (3, 'Liz'),
            (4, 'Tim'),
        )

        table1 = agate.Table(rows1, column_names, column_types)
        table2 = agate.Table(rows2, column_names, column_types)

        engine = create_engine(self.connection_string)
        connection = engine.connect()

        # Write two agate tables into the same SQL table
        table1.to_sql(connection,
                      'create_if_not_exists_test',
                      create=True,
                      create_if_not_exists=True,
                      insert=True)
        table2.to_sql(connection,
                      'create_if_not_exists_test',
                      create=True,
                      create_if_not_exists=True,
                      insert=True)

        table = agate.Table.from_sql(connection, 'create_if_not_exists_test')
        self.assertSequenceEqual(table.column_names, column_names)
        self.assertIsInstance(table.column_types[0], agate.Number)
        self.assertIsInstance(table.column_types[1], agate.Text)
        self.assertEqual(len(table.rows), len(table1.rows) + len(table1.rows))
        self.assertSequenceEqual(table.rows[0], table1.rows[0])