示例#1
0
def test_acs_columns_run():
    task = Columns()
    assert_equals(False, task.complete())
    assert_equals(0, len(current_session().dirty))
    worker = runtask(task)
    assert_equals(True, task.complete())
    assert_equals(True, worker.run_succeeded)
    assert_equals(0, len(current_session().dirty))
示例#2
0
def test_table_task():
    '''
    Very simple table task should run and make an entry in OBSTable, as
    well as a physical table with rows in observatory schema.
    '''
    task = TestTableTask()
    runtask(task)
    table = task.output().get(current_session())
    assert_is_not_none(table)
    assert_is_none(table.the_geom)
    assert_equal(
        current_session().execute('SELECT COUNT(*) FROM observatory.{}'.format(
            table.tablename)).fetchone()[0], 1)
示例#3
0
    def _numerators_tree(self, section_id, subsection_id):
        numerator_paths_result = current_session().execute('''
            WITH RECURSIVE children(numer_id, path) AS (
                SELECT numer_id, ARRAY[]::Text[]
                FROM observatory.obs_meta_numer children
                WHERE numer_tags ? 'subsection/{subsection_id}'
                    AND numer_tags ? 'section/{section_id}'
                    AND numer_weight > 0
                UNION
                SELECT parent.denom_id,
                    children.numer_id || children.path
                FROM observatory.obs_meta parent, children
                WHERE parent.numer_id = children.numer_id
                ) SELECT path from children WHERE numer_id IS NULL;
        '''.format(section_id=section_id, subsection_id=subsection_id))

        numerator_tree = {}
        numerator_ids = set()
        for row in numerator_paths_result.fetchall():
            node = numerator_tree
            for mid in row[0]:
                numerator_ids.add(mid)
                if mid not in node:
                    node[mid] = {}
                node = node[mid]

        numerator_details_result = current_session().execute('''
            SELECT numer_id,
                    numer_name,
                    numer_description,
                    numer_type,
                    numer_extra,
                    numer_aggregate,
                    numer_tags,
                    numer_colname suggested_name,
                    ARRAY[numer_timespan] timespan,
                    ARRAY_AGG(DISTINCT ARRAY[
                    denom_reltype,
                    denom_id,
                    denom_name
                    ]) denoms,
                    ARRAY_AGG(DISTINCT ARRAY[
                    geom_id, geom_name, numer_timespan,
                    geom_tags::Text
                    ]) geom_timespans,
                    FIRST(ST_AsText(ST_Envelope(the_geom))) envelope
            FROM observatory.obs_meta
            WHERE numer_id = ANY (ARRAY['{}'])
            GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9
        '''.format("', '".join(numerator_ids)))
        return numerator_tree, self._parse_columns(numerator_details_result)
示例#4
0
def test_geom_table_task_update():
    '''
    Should be possible to generate a new version of a table by incrementing
    the version number.
    '''
    task = TestGeomTableTask()
    runtask(task)
    assert_true(task.complete())
    task = TestGeomTableTask()
    task.version = lambda: 10
    assert_false(task.complete())
    current_session().commit()
    runtask(task)
    assert_true(task.complete())
def test_table_task_increment_version_runs_again():
    task = TestTableTask()
    runtask(task)
    output = task.output()
    assert_true(output.exists())

    task = TestTableTask()
    task.version = lambda: 10
    output = task.output()
    assert_false(output.exists())

    current_session().rollback()
    runtask(task)
    assert_true(output.exists())
示例#6
0
def test_table_task_increment_version_runs_again():
    task = TestTableTask()
    runtask(task)
    output = task.output()
    assert_true(output.exists())

    task = TestTableTask()
    task.version = lambda: 10
    output = task.output()
    assert_false(output.exists())

    current_session().rollback()
    runtask(task)
    assert_true(output.exists())
示例#7
0
def test_carto_2_temp_table_task():
    '''
    Convert a table on CARTO to a temp table.
    '''
    task = TestCSV2TempTableTask()
    before_table_count = current_session().execute(
        'SELECT COUNT(*) FROM observatory.obs_table').fetchone()[0]
    runtask(task)
    table = task.output()
    assert_equal(
        current_session().execute('SELECT COUNT(*) FROM {}'.format(
            table.table)).fetchone()[0], 10)
    after_table_count = current_session().execute(
        'SELECT COUNT(*) FROM observatory.obs_table').fetchone()[0]
    assert_equal(before_table_count, after_table_count)
示例#8
0
    def _boundaries_tree(self, section_id, subsection_id):
        boundaries_list_result = current_session().execute('''
            SELECT DISTINCT c.id
            FROM observatory.obs_tag section_t,
                    observatory.obs_column_tag section_ct,
                    observatory.obs_tag subsection_t,
                    observatory.obs_column_tag subsection_ct,
                    observatory.obs_column c
            WHERE section_t.id = section_ct.tag_id
                AND subsection_t.id = subsection_ct.tag_id
                AND c.id = section_ct.column_id
                AND c.id = subsection_ct.column_id
                AND subsection_t.id = '{subsection_id}'
                AND section_t.id = '{section_id}'
                AND subsection_t.type = 'subsection'
                AND section_t.type = 'section'
            GROUP BY c.id
            ORDER BY c.id
        '''.format(section_id=section_id, subsection_id=subsection_id))
        boundary_ids = [row[0] for row in boundaries_list_result.fetchall()]

        boundaries_detail_result = current_session().execute('''
            SELECT c.id,
                    FIRST(c.name),
                    FIRST(c.description),
                    FIRST(c.type),
                    FIRST(ctab.extra),
                    FIRST(c.aggregate),
                    JSONB_Object_Agg(t.type || '/' || t.id, t.name),
                    'name' suggested_name,
                    ARRAY_AGG(DISTINCT tab.timespan) timespan,
                    ARRAY[]::Text[] denoms,
                    ARRAY[]::Text[],
                    ST_AsText(ST_Envelope(FIRST(tab.the_geom))) envelope
            FROM observatory.obs_column c,
                    observatory.obs_column_tag ct,
                    observatory.obs_tag t,
                    observatory.obs_column_table ctab,
                    observatory.obs_table tab
            WHERE c.id = ANY(ARRAY['{}'])
                AND ct.column_id = c.id
                AND ct.tag_id = t.id
                AND c.id = ctab.column_id
                AND tab.id = ctab.table_id
            GROUP BY 1, 8
        '''.format("', '".join(boundary_ids)))
        boundary_data = self._parse_columns(boundaries_detail_result)
        return {k: {} for k in list(boundary_data.keys())}, boundary_data
示例#9
0
    def populate(self):
        session = current_session()

        insert = True
        input_ = self.input()
        output = self.output()
        session.execute('ALTER TABLE {output} ADD PRIMARY KEY (region_name)'.format(
            output=output.table))
        session.flush()
        for hometype, _, measure, _, _ in hometype_measures():
            col_id = hometype + '_' + measure
            input_table = input_[col_id].table
            stmt = '''INSERT INTO {output} (region_name, {col_id})
                      SELECT "RegionName", "{year}-{month}"::NUMERIC
                      FROM {input_table}
                      ON CONFLICT (region_name)
                         DO UPDATE SET {col_id} = EXCLUDED.{col_id}'''
            session.execute(stmt.format(
                output=output.table,
                year=str(self.year).zfill(2),
                month=str(self.month).zfill(2),
                col_id=col_id,
                input_table=input_table))
            if insert:
                insert = False
    def update_or_create_metadata(self, _testmode=False):
        session = current_session()

        # replace metadata table
        self.obs_table = session.merge(self.obs_table)
        obs_table = self.obs_table

        for i, colname_coltarget in enumerate(self._columns.items()):
            colname, coltarget = colname_coltarget
            colname = colname.lower()
            col = coltarget.get(session)

            if _testmode:
                coltable = OBSColumnTable(colname=colname,
                                          table=obs_table,
                                          column=col)
            else:
                # Column info for obs metadata
                coltable = session.query(OBSColumnTable).filter_by(
                    column_id=col.id, table_id=obs_table.id).first()
                if coltable:
                    coltable.colname = colname
                else:
                    # catch the case where a column id has changed
                    coltable = session.query(OBSColumnTable).filter_by(
                        table_id=obs_table.id, colname=colname).first()
                    if coltable:
                        coltable.column = col
                    else:
                        coltable = OBSColumnTable(colname=colname,
                                                  table=obs_table,
                                                  column=col)

            session.add(coltable)
示例#11
0
 def populate(self):
     session = current_session()
     session.execute('INSERT INTO {output} (the_geom, cvegeo) '
                     'SELECT wkb_geometry, cvegeo '
                     'FROM {input} '.format(
                         output=self.output().table,
                         input=self.input()['data'].table))
示例#12
0
    def run(self):
        session = current_session()

        input_table = self.input().get(session)
        names_table = input_table.tablename
        schema = self.output().schema
        output_table = self.output().qualified_tablename
        output_tablename = self.output().tablename

        session.execute('CREATE SCHEMA IF NOT EXISTS "{}"'.format(schema))

        query = '''
                CREATE TABLE {output_table} AS
                SELECT n.{geoid_field} geoid, '{geography}' as level, n.{geoname_field} geoname
                FROM observatory.{names_table} n
                '''.format(output_table=output_table,
                           geoid_field=self._geoid_field,
                           geoname_field=self._geoname_field,
                           geography=self.geography,
                           names_table=names_table)
        session.execute(query)

        query = '''
                CREATE INDEX {output_tablename}_idx ON {output_table} (geoid)
                '''.format(output_table=output_table,
                           output_tablename=output_tablename)
        session.execute(query)

        session.commit()
示例#13
0
    def run(self):
        session = current_session()
        input_ = self.input()

        session.execute('ALTER TABLE {rel_table} DROP CONSTRAINT IF EXISTS '
                        '{country}hierarchy_fk_parent'.format(
            rel_table=input_['rel'].qualified_tablename,
            country=self._country, ))

        session.execute('ALTER TABLE {rel_table} ADD CONSTRAINT '
                        '{country}hierarchy_fk_parent '
                        'FOREIGN KEY (child_id, child_level) '
                        'REFERENCES {info_table} (geoid, level) '.format(
            rel_table=input_['rel'].qualified_tablename,
            info_table=input_['info'].qualified_tablename,
            country=self._country, ))

        session.execute('ALTER TABLE {rel_table} DROP CONSTRAINT IF EXISTS '
                        '{country}hierarchy_fk_child'.format(
            rel_table=input_['rel'].qualified_tablename,
            country=self._country, ))
        session.execute('ALTER TABLE {rel_table} ADD CONSTRAINT '
                        '{country}hierarchy_fk_child '
                        'FOREIGN KEY (parent_id, parent_level) '
                        'REFERENCES {info_table} (geoid, level) '.format(
            rel_table=input_['rel'].qualified_tablename,
            info_table=input_['info'].qualified_tablename,
            country=self._country, ))
        session.commit()
示例#14
0
def teardown():
    from tasks.meta import current_session, Base
    if Base.metadata.bind.url.database != 'test':
        raise Exception('Can only run tests on database "test"')
    session = current_session()
    session.rollback()
    Base.metadata.drop_all()
示例#15
0
    def populate(self):
        session = current_session()

        insert = True
        for hometype, _, measure, _, _ in hometype_measures():
            col_id = hometype + '_' + measure
            input_table = self.input()[col_id].table
            if insert:
                stmt = 'INSERT INTO {output} (region_name, {col_id}) ' \
                        'SELECT "RegionName", "{year}-{month}"::NUMERIC ' \
                        'FROM {input_table} '
            else:
                stmt = 'UPDATE {output} ' \
                        'SET {col_id} = "{year}-{month}"::NUMERIC ' \
                        'FROM {input_table} WHERE ' \
                        '{input_table}."RegionName" = {output}.region_name '
            session.execute(stmt.format(
                output=self.output().table,
                year=str(self.year).zfill(2),
                month=str(self.month).zfill(2),
                col_id=col_id,
                input_table=input_table))
            if insert:
                session.execute('ALTER TABLE {output} ADD PRIMARY KEY (region_name)'.format(
                    output=self.output().table))
            insert = False
示例#16
0
 def populate(self):
     # This select statement transforms the input table, taking advantage of our
     # new column names.
     # The session is automatically committed if there are no errors.
     session = current_session()
     columns = self.columns()
     colnames = columns.keys()
     select_colnames = []
     for naics_code, naics_columns in self.input()['naics'].iteritems():
         for colname in naics_columns.keys():
             select_colnames.append('''MAX(CASE
                 WHEN industry_code = '{naics_code}' THEN {colname} ELSE NULL
             END)::Numeric'''.format(naics_code=naics_code,
                                     colname=colname
                                    ))
     insert = '''INSERT INTO {output} ({colnames})
                 SELECT area_fips, {select_colnames}
                 FROM {input}
                 GROUP BY area_fips '''.format(
                     output=self.output().table,
                     input=self.input()['data'].table,
                     colnames=', '.join(colnames),
                     select_colnames=', '.join(select_colnames),
                 )
     session.execute(insert)
示例#17
0
    def run(self):
        session = current_session()
        measurements = self._get_config_data()
        for measure in measurements:
            measure['geom_id'] = GEOGRAPHY_LEVELS[self.geography]
        json_metadata = json.dumps(measurements)
        result = session.execute(self._get_meta_query(json_metadata))
        if result:
            join_data = {}
            join_data['numer'] = {}
            colnames = ['geom_id as geoid']
            for data in result.fetchall():
                join_data['numer'][data['numer_table']] = {
                    'table': 'observatory.{}'.format(data['numer_table']),
                    'join_column': data['numer_join_col']
                }
                # All come from the same geometry tables so we use, by now, just one geometry
                # TODO Make it possible to have multiple geometry tables
                join_data['geom'] = {
                    'table': 'observatory.{}'.format(data['geom_table']),
                    'join_column': data['geom_join_col']
                }
                colnames.append(data['numer_col'])
            measurement_result = session.execute(
                self._get_measurements_query(join_data, colnames))
            if measurement_result:
                measurements = measurement_result.fetchall()
                self._generate_csv_file(colnames, measurements)
            else:
                LOGGER.error('No results for the queried measurements')

        else:
            LOGGER.error(
                'No results for the defined measurements in the JSON file')
示例#18
0
 def populate(self):
     # This select statement transforms the input table, taking advantage of our
     # new column names.
     # The session is automatically committed if there are no errors.
     session = current_session()
     columns = self.columns()
     colnames = list(columns.keys())
     select_colnames = []
     input_ = self.input()
     for naics_code, naics_columns in input_['naics'].items():
         for colname in list(naics_columns.keys()):
             select_colnames.append('''MAX(CASE
                 WHEN industry_code = '{naics_code}' THEN {colname} ELSE NULL
             END)::Numeric'''.format(naics_code=naics_code,
                                     colname=colname))
     insert = '''INSERT INTO {output} ({colnames})
                 SELECT area_fips AS area_fipssl, area_fips AS area_fipssc, {select_colnames}
                 FROM {input}
                 GROUP BY area_fips '''.format(
         output=self.output().table,
         input=input_['data'].table,
         colnames=', '.join(colnames),
         select_colnames=', '.join(select_colnames),
     )
     session.execute(insert)
示例#19
0
 def run(self):
     session = current_session()
     resp = session.execute('SELECT table_schema, table_name '
                            'FROM information_schema.tables '
                            "WHERE table_schema ILIKE 'observatory' ")
     for _, tablename in resp:
         if tablename in ('obs_table', 'obs_column_table', 'obs_column',
                          'obs_tag', 'obs_column_to_column',
                          'obs_column_tag'):
             continue
         if session.query(OBSTable).filter_by(
                 tablename=tablename).count() == 0:
             cnt = session.execute(
                 'SELECT COUNT(*) FROM observatory.{tablename}'.format(
                     tablename=tablename)).fetchone()[0]
             if cnt == 0:
                 stmt = 'DROP TABLE observatory.{tablename} CASCADE'.format(
                     tablename=tablename)
                 LOGGER.info(stmt)
                 session.execute(stmt)
                 session.commit()
             else:
                 raise Exception(
                     "Will not automatically drop table {tablename} "
                     "with data in it".format(tablename=tablename))
示例#20
0
def test_empty_obs_meta_to_local():
    '''
    OBSMetaToLocal should work even if tables are empty.  Should result in
    creation of blank obs_meta, obs_meta_numer, obs_meta_denom, obs_meta_geom,
    obs_meta_timespan tables.
    '''
    imp.reload(tasks.carto)
    task = tasks.carto.OBSMetaToLocal()
    runtask(task)
    session = current_session()
    assert_equals(
        session.execute(
            'SELECT COUNT(*) FROM observatory.obs_meta').fetchone()[0], 0)
    assert_equals(
        session.execute(
            'SELECT COUNT(*) FROM observatory.obs_meta_numer').fetchone()[0],
        0)
    assert_equals(
        session.execute(
            'SELECT COUNT(*) FROM observatory.obs_meta_denom').fetchone()[0],
        0)
    assert_equals(
        session.execute(
            'SELECT COUNT(*) FROM observatory.obs_meta_geom').fetchone()[0], 0)
    assert_equals(
        session.execute('SELECT COUNT(*) FROM observatory.obs_meta_timespan').
        fetchone()[0], 0)
    assert_equals(
        session.execute(
            'SELECT COUNT(*) FROM observatory.obs_meta_geom_numer_timespan').
        fetchone()[0], 0)
示例#21
0
    def populate_block(self):
        '''
        load relevant columns from underlying census tables
        '''
        session = current_session()
        colids = []
        colnames = []
        for colname, coltarget in self.columns().items():
            colid = coltarget.get(session).id
            if 'geoid' in colid:
                colids.append('bi.blockid {colname}'.format(colname=colname))
            else:
                colids.append('({colname} * (bi.percentage/100.0))'.format(
                    colname=colname))
            colnames.append(colname)

        tableclause = 'tiger2015.blocks_interpolation bi INNER JOIN {bg_table} obs ON (bi.blockgroupid = obs.geoidsl)'.format(
            bg_table=self.input()['bg_extract'].table)
        table_id = self.output().table
        insert_query = '''
            INSERT INTO {output} ({colnames})
            SELECT {colids}
            FROM {tableclause}
        '''.format(
            output=table_id,
            colnames=', '.join(colnames),
            colids=', '.join(colids),
            tableclause=tableclause,
        )
        LOGGER.debug(insert_query)
        session.execute(insert_query)
示例#22
0
 def populate(self):
     session = current_session()
     oacc = OutputAreaClassificationColumns
     sprgrp_case = ' '.join([
         "WHEN '{}' THEN '{}'".format(c[1], c[0])
         for c in oacc.sprgrp_mapping.items()
     ])
     grp_case = ' '.join([
         "WHEN '{}' THEN '{}'".format(c[1], c[0])
         for c in oacc.grp_mapping.items()
     ])
     subgrp_case = ' '.join([
         "WHEN '{}' THEN '{}'".format(c[1], c[0])
         for c in oacc.subgrp_mapping.items()
     ])
     session.execute('INSERT INTO {output} '
                     'SELECT oa_sa, '
                     'CASE sprgrp {sprgrp_case} END sprgrp, '
                     'CASE grp {grp_case} END grp, '
                     'CASE subgrp {subgrp_case} END subgrp '
                     'FROM {input}'.format(
                         output=self.output().table,
                         input=self.input()['data'].table,
                         sprgrp_case=sprgrp_case,
                         grp_case=grp_case,
                         subgrp_case=subgrp_case,
                     ))
示例#23
0
 def populate(self):
     session = current_session()
     session.execute('INSERT INTO {output} (the_geom, cvegeo) '
                     'SELECT wkb_geometry, cvegeo '
                     'FROM {input} '.format(
                         output=self.output().table,
                         input=self.input()['data'].table))
示例#24
0
 def run(self):
     session = current_session()
     try:
         session.execute('DROP TABLE IF EXISTS observatory.obs_meta')
         session.execute(self.FIRST_AGGREGATE)
         session.execute('CREATE TABLE observatory.obs_meta AS {select}'.format(
             select=self.QUERY.replace('the_geom_webmercator', 'the_geom')
         ))
         # confirm that there won't be ambiguity with selection of geom
         session.execute('CREATE UNIQUE INDEX ON observatory.obs_meta '
                         '(numer_id, denom_id, numer_timespan, geom_weight)')
         session.execute('CREATE INDEX ON observatory.obs_meta USING gist '
                         '(the_geom)')
         for dimension, query in self.DIMENSIONS.iteritems():
             session.execute('DROP TABLE IF EXISTS observatory.obs_meta_{dimension}'.format(
                 dimension=dimension))
             session.execute('CREATE TABLE observatory.obs_meta_{dimension} '
                             'AS {select}'.format(
                                 dimension=dimension,
                                 select=query.replace('the_geom_webmercator', 'the_geom') \
                                             .replace('3857', '4326')
                             ))
             session.execute('CREATE INDEX ON observatory.obs_meta_{dimension} USING gist '
                             '(the_geom)'.format(dimension=dimension))
         session.commit()
         self._complete = True
     except:
         session.rollback()
         raise
示例#25
0
    def populate_general(self):
        session = current_session()
        columns = self.columns()
        out_colnames = list(columns.keys())
        in_table = self.input()['data']
        in_colnames = [ct._id.split('.')[-1] for ct in list(columns.values())]
        in_colnames[0] = 'geo_code'
        for i, in_c in enumerate(in_colnames):
            cmd =   "SELECT 'exists' FROM information_schema.columns " \
                    "WHERE table_schema = '{schema}' " \
                    "  AND table_name = '{tablename}' " \
                    "  AND column_name = '{colname}' " \
                    "  LIMIT 1".format(
                        schema=in_table.schema,
                        tablename=in_table.tablename.lower(),
                        colname=in_c.lower())
            # remove columns that aren't in input table
            if session.execute(cmd).fetchone() is None:
                in_colnames[i] = None
                out_colnames[i] = None
        in_colnames = [
            "CASE {ic}::TEXT WHEN '-6' THEN NULL ELSE {ic} END".format(ic=ic) for ic in in_colnames if ic is not None]
        out_colnames = [oc for oc in out_colnames if oc is not None]

        cmd = 'INSERT INTO {output} ({out_colnames}) ' \
              'SELECT {in_colnames} FROM {input} '.format(
                    output=self.output().table,
                    input=in_table.table,
                    in_colnames=', '.join(in_colnames),
                    out_colnames=', '.join(out_colnames))
        session.execute(cmd)
示例#26
0
    def populate_da_from_cd(self):
        session = current_session()
        columns = self.columns()
        colnames = list(columns.keys())
        out_colnames = [oc for oc in colnames if oc is not None]
        in_colnames = ['da.geom_id']
        for colname in out_colnames:
            if colname != 'geo_code':
                # We reduce the number of decimals to reduce the size of the row to avoid hit
                # the limit which is 8Kb. More info https://github.com/CartoDB/bigmetadata/issues/527
                in_colnames.append('round(cast(float8 ({colname} * (ST_Area(da.the_geom)/ST_Area(cd.the_geom))) as numeric), 2) {colname}'.format(colname=colname))

        insert_query = '''
                INSERT INTO {output} ({out_colnames})
                SELECT {in_colnames} FROM {da_geom} da
                INNER JOIN {cd_geom} cd ON (cd.geom_id = left(da.geom_id,4))
                INNER JOIN {cd_data} data ON (cd.geom_id = data.geo_code)
                '''.format(output=self.output().table,
                           da_geom=self.input()['geo'].table,
                           cd_geom=self.input()['geo_source'].table,
                           cd_data=self.input()['data_source'].table,
                           in_colnames=', '.join(in_colnames),
                           out_colnames=', '.join(out_colnames))

        LOGGER.debug(insert_query)
        session.execute(insert_query)
示例#27
0
    def populate(self):
        session = current_session()

        session.execute('INSERT INTO {output} '
                        'SELECT "wof:id", '
                        'CASE WHEN ST_Npoints(wkb_geometry) > 1000000 '
                        '     THEN ST_Simplify(wkb_geometry, 0.0001) '
                        '     ELSE wkb_geometry '
                        'END, '
                        '"wof:name" '
                        'FROM {input} '.format(
                            output=self.output().table,
                            input=self.input()['data'].table))

        # replace default WOF US states with our clipped versions
        if self.resolution == 'region':
            for geoid, statename in session.execute(
                    'SELECT geoid, name FROM tiger2014.state'):
                session.execute('UPDATE {output} out '
                                'SET the_geom = shoreline.the_geom '
                                'FROM {shoreline} shoreline '
                                'WHERE shoreline.geoid = \'{geoid}\' '
                                '  AND out.name ILIKE \'{statename}\' '.format(
                                    shoreline=self.input()['shoreline'].table,
                                    output=self.output().table,
                                    geoid=geoid,
                                    statename=statename))
示例#28
0
 def populate_mb(self):
     session = current_session()
     column_targets = self.columns()
     out_colnames = [oc.lower() for oc in list(column_targets.keys())]
     in_colnames = ['mb.geom_id as region_id']
     for ic in list(column_targets.keys()):
         if ic != 'region_id':
             in_colnames.append(
                 'round(cast(float8 ({ic} * (ST_Area(mb.the_geom)/ST_Area(sa1geo.the_geom))) as numeric), 2) as {ic}'
                 .format(ic=ic.lower()))
     insert_query = '''
           INSERT INTO {output} ("{out_colnames}")
           SELECT {in_colnames}
           FROM {input_geo_mb} mb
           INNER JOIN {input_geo_sa1} sa1geo ON (mb.parent_id = sa1geo.geom_id)
           INNER JOIN {input_data} sa1data ON (mb.parent_id = sa1data.region_id)
           '''.format(output=self.output().table,
                      input_data=self.input()['data'].table,
                      input_geo_mb=self.input()['geo'].table,
                      input_geo_sa1=self.input()['geo_sa1'].table,
                      in_colnames=', '.join(in_colnames),
                      out_colnames='", "'.join(out_colnames))
     try:
         LOGGER.debug(insert_query)
         session.execute(insert_query)
     except Exception:
         session.rollback()
def test_table_task(klass, params):
    '''
    Test {} task with {}.

    This does not clear out all database artifacts between runs, for
    performance reasons.  The order of decorators could be switched to enforce
    cleaner behavior, but running all necessary ColumnsTasks repeatedly tends
    to be very slow.
    '''.format(klass, params)

    params.update(get_custom_parameters(klass.__name__))
    task = klass(**params)
    runtask(task, superclasses=[TagsTask, ColumnsTask, TableTask])

    imp.reload(tasks.carto)
    obs_meta_to_local = tasks.carto.OBSMetaToLocal()

    runtask(obs_meta_to_local)

    session = current_session()
    assert_greater(
        session.execute(
            'SELECT COUNT(*) FROM observatory.obs_meta').fetchone()[0], 0)

    session.execute('DROP TABLE observatory.obs_meta')
    session.execute('DELETE FROM observatory.obs_table')
    session.commit()
示例#30
0
    def populate(self):
        session = current_session()
        columns = self.columns()
        out_colnames = columns.keys()
        in_table = self.input()['data']
        in_colnames = [ct._id.split('.')[-1] for ct in columns.values()]
        in_colnames[0] = 'geo_code'
        for i, in_c in enumerate(in_colnames):
            cmd =   "SELECT 'exists' FROM information_schema.columns " \
                    "WHERE table_schema = '{schema}' " \
                    "  AND table_name = '{tablename}' " \
                    "  AND column_name = '{colname}' " \
                    "  LIMIT 1".format(
                        schema=in_table.schema,
                        tablename=in_table.tablename.lower(),
                        colname=in_c.lower())
            # remove columns that aren't in input table
            if session.execute(cmd).fetchone() is None:
                in_colnames[i] = None
                out_colnames[i] = None
        in_colnames = [
            "CASE {ic}::TEXT WHEN '-6' THEN NULL ELSE {ic} END".format(ic=ic) for ic in in_colnames if ic is not None]
        out_colnames = [oc for oc in out_colnames if oc is not None]

        cmd =   'INSERT INTO {output} ({out_colnames}) ' \
                'SELECT {in_colnames} FROM {input} '.format(
                    output=self.output().table,
                    input=in_table.table,
                    in_colnames=', '.join(in_colnames),
                    out_colnames=', '.join(out_colnames))
        session.execute(cmd)
示例#31
0
 def populate(self):
     session = current_session()
     session.execute('''
         INSERT INTO {output} (the_geom, huc_12)
         SELECT ST_MakeValid(ST_Simplify(wkb_geometry, 0.0005)) the_geom, huc_12
         FROM {input}'''.format(input=self.input()['data'].table,
                                output=self.output().table))
示例#32
0
 def populate(self):
     session = current_session()
     session.execute('''
         INSERT INTO {output}
         SELECT linearid, fullname, rttyp, mtfcc, geom
         FROM {input}'''.format(output=self.output().table,
                                input=self.input()['data'].table))
示例#33
0
def test_obs_meta_to_local_works_twice():
    '''
    Should be possible to run OBSMetaToLocal twice in a row.
    '''
    imp.reload(tasks.carto)
    task = tasks.carto.OBSMetaToLocal()
    runtask(task)
    runtask(task)
    session = current_session()
    assert_equals(
        session.execute(
            'SELECT COUNT(*) FROM observatory.obs_meta').fetchone()[0], 0)
    assert_equals(
        session.execute(
            'SELECT COUNT(*) FROM observatory.obs_meta_numer').fetchone()[0],
        0)
    assert_equals(
        session.execute(
            'SELECT COUNT(*) FROM observatory.obs_meta_denom').fetchone()[0],
        0)
    assert_equals(
        session.execute(
            'SELECT COUNT(*) FROM observatory.obs_meta_geom').fetchone()[0], 0)
    assert_equals(
        session.execute('SELECT COUNT(*) FROM observatory.obs_meta_timespan').
        fetchone()[0], 0)
    assert_equals(
        session.execute(
            'SELECT COUNT(*) FROM observatory.obs_meta_geom_numer_timespan').
        fetchone()[0], 0)
示例#34
0
 def output(self):
     session = current_session()
     for table in session.query(OBSTable):
         split = table.id.split('.')
         schema, task_id = split[0:-1], split[-1]
         modname = 'tasks.' + '.'.join(schema)
         module = __import__(modname, fromlist=['*'])
         exists = False
         for name in dir(module):
             kls = getattr(module, name)
             if not isinstance(kls, Register):
                 continue
             # this doesn't work because of underscore_slugify
             #possible_kls = '_'.join(task_id.split('_')[0:-len(kls.get_params())-1])
             if task_id.startswith(underscore_slugify(name)):
                 exists = True
         if exists is True:
             LOGGER.info('{table} exists'.format(table=table))
         else:
             # TODO drop table
             import pdb
             pdb.set_trace()
             LOGGER.info(table)
         yield PostgresTarget(schema='observatory',
                              tablename=table.tablename)
示例#35
0
def test_csv_2_temp_table_task():
    '''
    CSV to temp table task should run and generate no entry in OBSTable, but
    a physical table in the right schema.
    '''
    task = TestCSV2TempTableTask()
    before_table_count = current_session().execute(
        'SELECT COUNT(*) FROM observatory.obs_table').fetchone()[0]
    runtask(task)
    table = task.output()
    assert_equal(
        current_session().execute('SELECT COUNT(*) FROM {}'.format(
            table.table)).fetchone()[0], 10)
    after_table_count = current_session().execute(
        'SELECT COUNT(*) FROM observatory.obs_table').fetchone()[0]
    assert_equal(before_table_count, after_table_count)
示例#36
0
    def populate(self):
        session = current_session()

        insert = True
        input_ = self.input()
        output = self.output()
        session.execute(
            'ALTER TABLE {output} ADD PRIMARY KEY (region_name)'.format(
                output=output.table))
        session.flush()
        for hometype, _, measure, _, _ in hometype_measures():
            col_id = hometype + '_' + measure
            input_table = input_[col_id].table
            stmt = '''INSERT INTO {output} (region_name, {col_id})
                      SELECT "RegionName", "{year}-{month}"::NUMERIC
                      FROM {input_table}
                      ON CONFLICT (region_name)
                         DO UPDATE SET {col_id} = EXCLUDED.{col_id}'''
            session.execute(
                stmt.format(output=output.table,
                            year=str(self.year).zfill(2),
                            month=str(self.month).zfill(2),
                            col_id=col_id,
                            input_table=input_table))
            if insert:
                insert = False
示例#37
0
def test_geom_table_task():
    '''
    Table task with geoms should run and generate an entry in OBSTable with
    a summary geom, plus a physical table with rows in observatory schema.

    It should also generate a raster tile summary.
    '''
    task = TestGeomTableTask()
    runtask(task)
    table = task.output().get(current_session())
    assert_is_not_none(table)
    assert_is_not_none(table.the_geom)
    assert_equal(
        current_session().execute('SELECT COUNT(*) FROM observatory.{}'.format(
            table.tablename)).fetchone()[0], 1)
    assert_greater(current_session().query(OBSColumnTableTile).count(), 0)
示例#38
0
    def populate(self):
        session = current_session()

        session.execute('INSERT INTO {output} '
                        'SELECT "wof:id", '
                        'CASE WHEN ST_Npoints(wkb_geometry) > 1000000 '
                        '     THEN ST_Simplify(wkb_geometry, 0.0001) '
                        '     ELSE wkb_geometry '
                        'END, '
                        '"wof:name" '
                        'FROM {input} '.format(
                            output=self.output().table,
                            input=self.input()['data'].table
                        ))

        # replace default WOF US states with our clipped versions
        if self.resolution == 'region':
            for geoid, statename in session.execute('SELECT geoid, name FROM tiger2014.state'):
                session.execute('UPDATE {output} out '
                                'SET the_geom = shoreline.the_geom '
                                'FROM {shoreline} shoreline '
                                'WHERE shoreline.geoid = \'{geoid}\' '
                                '  AND out.name ILIKE \'{statename}\' '.format(
                                    shoreline=self.input()['shoreline'].table,
                                    output=self.output().table,
                                    geoid=geoid,
                                    statename=statename))
示例#39
0
文件: cdrc.py 项目: stvno/bigmetadata
 def populate(self):
     session = current_session()
     session.execute('INSERT INTO {output} '
                     'SELECT ST_MakeValid(wkb_geometry), oa_sa, sprgrp, grp, subgrp '
                     'FROM {input}'.format(
                         output=self.output().table,
                         input=self.input()['data'].table,
                     ))
示例#40
0
def session_scope():
    """Provide a transactional scope around a series of operations."""
    try:
        yield current_session()
        session_commit(None)
    except Exception as e:
        session_rollback(None, e)
        raise
示例#41
0
 def populate(self):
     session = current_session()
     session.execute(' INSERT INTO {output} '
                     ' SELECT the_geom, id_2, pop2010, name_2 '
                     ' FROM {input} '.format(
                         output=self.output().table,
                         input=self.input()['data'].table
                     ))
示例#42
0
def test_column_task(klass):
    # Ensure every column task runs and produces some kind of independent
    # metadata.
    # TODO: test columnstasks that have params
    if klass.get_param_names():
        raise SkipTest("Cannot test ColumnsTask with params")
    task = klass()
    runtask(task)
    assert_greater(current_session().query(OBSColumn).filter(OBSColumn.id.startswith(classpath(task))).count(), 0)
示例#43
0
    def requires(self):
        tables = {}
        session = current_session()
        for table in session.query(OBSTable):
            if should_upload(table):
                tables[table.id] = table.tablename

        for table_id, tablename in tables.iteritems():
            yield TableToCartoViaImportAPI(table=tablename, force=self.force)
示例#44
0
 def run(self):
     session = current_session()
     session.execute('CREATE TABLE {output} AS '
                     'SELECT geoid, ST_Union(ST_MakeValid(the_geom)) AS the_geom, '
                     '       MAX(aland) aland, MAX(awater) awater '
                     'FROM {input} '
                     'GROUP BY geoid'.format(
                         output=self.output().table,
                         input=self.input().table))
示例#45
0
 def populate(self):
     session = current_session()
     session.execute('INSERT INTO {output} '
                     'SELECT {code}uid as geom_id, '
                     '       wkb_geometry as geom '
                     'FROM {input} '.format(
                         output=self.output().table,
                         code=self.resolution.replace('_', ''),
                         input=self.input()['data'].table))
示例#46
0
 def run(self):
     session = current_session()
     session.execute('CREATE TABLE {output} (the_geom GEOMETRY)'.format(
         output=self.output().table
     ))
     session.execute('INSERT INTO {output} '
                     'SELECT ST_SimplifyPreserveTopology(ST_Union(geom), 0.1) '
                     'FROM tiger2014.state'.format(
                         output=self.output().table
                     ))
示例#47
0
 def run(self):
     session = current_session()
     stmt = ('CREATE TABLE {output} '
             'AS SELECT geoid, id, ST_Difference( '
             'ST_MakeValid(pos_geom), ST_MakeValid(neg_geom)) the_geom '
             #'pos_geom, neg_geom) the_geom '
             'FROM {input}'.format(
                 output=self.output().table,
                 input=self.input().table), )[0]
     session.execute(stmt)
示例#48
0
 def requires(self):
     session = current_session()
     if self.exact_id:
         table = session.query(OBSTable).get(self.exact_id)
     elif self.tablename:
         table = session.query(OBSTable).filter(OBSTable.tablename == self.tablename).one()
     elif self.id:
         table = session.query(OBSTable).filter(OBSTable.id.ilike('%' + self.id + '%')).one()
     else:
         raise Exception('Need id or exact_id for SyncData')
     return TableToCarto(table=table.tablename, force=self.force)
    def populate(self):
        table_name = self.output().table
        shell(r"psql -c '\copy {table} FROM {file_path} WITH CSV HEADER'".format(
            table=table_name,
            file_path=self.input()['data_file'].path
        ))
        for name, segment_id in SpielmanSingletonColumns.x10_mapping.iteritems():
            current_session().execute("update {table} set X10 = '{name}' "
                                      "where X10 ='{segment_id}'; ".format(
                                          table=table_name,
                                          name=name,
                                          segment_id=segment_id
                                      ))

        for name, segment_id in SpielmanSingletonColumns.x55_mapping.iteritems():
            current_session().execute("update {table} set X55 = '{name}' "
                                      "where X55 ='{segment_id}'; ".format(
                                          table=table_name,
                                          name=name,
                                          segment_id=segment_id
                                      ))
示例#50
0
 def run(self):
     session = current_session()
     session.execute("CREATE TABLE {output} AS "
                     "SELECT * FROM {input} "
                     "WHERE agglvl_code IN ('74', '73', '71') "
                     "  AND year = '{year}' "
                     "  AND qtr = '{qtr}' "
                     "  AND own_code = '5' ".format(
                         input=self.input().table,
                         output=self.output().table,
                         year=self.year,
                         qtr=self.qtr,
                     ))
示例#51
0
 def run(self):
     session = current_session()
     for _output in self.output():
         if not _output.exists():
             resp = session.execute("SELECT id from observatory.obs_table "
                                    "WHERE tablename = '{tablename}'".format(
                                        tablename=_output.tablename))
             _id = resp.fetchall()[0][0]
             stmt = "DELETE FROM observatory.obs_table " \
                     "WHERE id = '{id}'".format(id=_id)
             print(stmt)
             session.execute(stmt)
             session.commit()
示例#52
0
 def run(self):
     session = current_session()
     try:
         self.output().makedirs()
         session.execute(
             'INSERT INTO observatory.obs_dump_version (dump_id) '
             "VALUES ('{task_id}')".format(task_id=self.task_id))
         session.commit()
         shell('pg_dump -Fc -Z0 -x -n observatory -f {output}'.format(
             output=self.output().path))
     except Exception as err:
         session.rollback()
         raise err
示例#53
0
 def populate(self):
     session = current_session()
     stmt = ('INSERT INTO {output} '
             'SELECT geoid, ST_Union(ST_MakePolygon(ST_ExteriorRing(the_geom))) AS the_geom, '
             '       MAX(aland) aland '
             'FROM ( '
             '    SELECT geoid, (ST_Dump(the_geom)).geom AS the_geom, '
             '           aland '
             '    FROM {input} '
             ") holes WHERE GeometryType(the_geom) = 'POLYGON' "
             'GROUP BY geoid'.format(
                 output=self.output().table,
                 input=self.input()['data'].table), )[0]
     session.execute(stmt)
示例#54
0
 def output(self):
     targets = {}
     session = current_session()
     i = 0
     for section in session.query(OBSTag).filter(OBSTag.type == 'section'):
         for subsection in session.query(OBSTag).filter(OBSTag.type == 'subsection'):
             i += 1
             if i > 1 and self.preview:
                 break
             targets[(section.id, subsection.id)] = LocalTarget(
                 'catalog/source/{section}/{subsection}.rst'.format(
                     section=strip_tag_id(section.id),
                     subsection=strip_tag_id(subsection.id)))
     return targets
示例#55
0
    def run(self):
        session = current_session()
        for section_subsection, target in self.output().iteritems():
            section_id, subsection_id = section_subsection
            section = session.query(OBSTag).get(section_id)
            subsection = session.query(OBSTag).get(subsection_id)
            target.makedirs()
            fhandle = target.open('w')

            if '.. cartofigure:: ' in subsection.description:
                viz_id = re.search(r'\.\. cartofigure:: (\S+)', subsection.description).groups()[0]
                viz_path = os.path.join('../', *self.input()[viz_id].path.split(os.path.sep)[2:])
                subsection.description = re.sub(r'\.\. cartofigure:: (\S+)',
                                                '.. figure:: {}'.format(viz_path),
                                                subsection.description)
            columns = []
            for col in subsection.columns:
                if section not in col.tags:
                    continue

                if col.weight < 1:
                    continue

                # tags with denominators will appear beneath that denominator
                if not col.has_denominators():
                    columns.append(col)

                # unless the denominator is not in this subsection
                else:
                    add_to_columns = True
                    for denominator in col.denominators():
                        if subsection in denominator.tags:
                            add_to_columns = False
                            break
                    if add_to_columns:
                        columns.append(col)

            columns.sort(lambda x, y: cmp(x.name, y.name))

            with open('catalog/source/{}.rst'.format(strip_tag_id(section.id)), 'w') as section_fhandle:
                section_fhandle.write(SECTION_TEMPLATE.render(
                    section=section, **self.template_globals()))
            if columns:
                fhandle.write(SUBSECTION_TEMPLATE.render(
                    subsection=subsection, columns=columns, format=self.format,
                    **self.template_globals()
                ).encode('utf8'))
            else:
                fhandle.write('')
            fhandle.close()
示例#56
0
 def run(self):
     session = current_session()
     session.execute('DROP TABLE IF EXISTS {output}'.format(
         output=self.output().table
     ))
     session.execute('CREATE TABLE {output} AS {select}'.format(
         output=self.output().table,
         select=self.QUERY.replace('the_geom_webmercator', 'the_geom')
     ))
     # confirm that there won't be ambiguity with selection of geom
     session.execute('CREATE UNIQUE INDEX ON observatory.obs_meta '
                     '(numer_id, denom_id, numer_timespan, geom_weight)')
     session.commit()
     self.force = False
示例#57
0
 def populate(self):
     session = current_session()
     from_clause = '{inputschema}.{input_tablename}'.format(
         inputschema='tiger' + str(self.year),
         input_tablename=self.input_tablename,
     )
     session.execute('INSERT INTO {output} (geoid, the_geom, aland, awater) '
                     'SELECT {geoid}, geom the_geom, {aland}, {awater} '
                     'FROM {from_clause} '.format(
                         geoid=self.geoid,
                         output=self.output().table,
                         aland=self.aland,
                         awater=self.awater,
                         from_clause=from_clause
                     ))
示例#58
0
 def run(self):
     session = current_session()
     stmt = ('CREATE TABLE {output} AS '
             'SELECT id, geoid, ST_Union(ST_MakeValid(neg.geom)) neg_geom, '
             '       MAX(pos.the_geom) pos_geom '
             'FROM {pos} pos, {neg} neg '
             'WHERE ST_Intersects(pos.the_geom, neg.geom) '
             '      AND pos.awater > 0 '
             '      {mask_clause} '
             'GROUP BY id '.format(
                 neg=self.input()['neg'].table,
                 mask_clause=' AND in_landmask = false' if self.use_mask() else '',
                 pos=self.input()['pos'].table,
                 output=self.output().table), )[0]
     session.execute(stmt)
示例#59
0
    def run(self):
        session = current_session()
        session.execute('CREATE TABLE {output} '
                        '(id serial primary key, geoid text, the_geom geometry, '
                        'aland NUMERIC, awater NUMERIC)'.format(
                            output=self.output().table))
        session.execute('INSERT INTO {output} (geoid, the_geom, aland, awater) '
                        'SELECT geoid, ST_Subdivide(the_geom) the_geom, '
                        '       aland, awater '
                        'FROM {input} '
                        'WHERE aland > 0 '.format(output=self.output().table,
                                                  input=self.input().table))

        session.execute('CREATE INDEX ON {output} USING GIST (the_geom)'.format(
            output=self.output().table))