def test_column_task(klass): # Ensure every column task runs and produces some kind of independent # metadata. # TODO: test columnstasks that have params if klass.get_param_names(): raise SkipTest("Cannot test ColumnsTask with params") task = klass() runtask(task) assert_greater(current_session().query(OBSColumn).filter(OBSColumn.id.startswith(classpath(task))).count(), 0)
def test_column_task(klass): # Ensure every column task runs and produces some kind of independent # metadata. # TODO: test columnstasks that have params if klass.get_param_names(): raise SkipTest('Cannot test ColumnsTask with params') task = klass() runtask(task) assert_greater( current_session().query(OBSColumn).filter( OBSColumn.id.startswith(classpath(task))).count(), 0)
def load_sumlevels(): ''' Load summary levels from JSON. Returns a dict by sumlevel number. ''' with open(os.path.join(os.path.dirname(__file__), 'summary_levels.json')) as fhandle: sumlevels_list = json.load(fhandle) sumlevels = {} for slevel in sumlevels_list: # Replace pkey ancestors with paths to columns # We subtract 1 from the pkey because it's 1-indexed, unlike python fields = slevel['fields'] for i, ancestor in enumerate(fields['ancestors']): colpath = os.path.join('columns', classpath(load_sumlevels), sumlevels_list[ancestor - 1]['fields']['slug']) fields['ancestors'][i] = colpath if fields['parent']: fields['parent'] = os.path.join( 'columns', classpath(load_sumlevels), sumlevels_list[fields['parent'] - 1]['fields']['slug']) sumlevels[fields['summary_level']] = fields return sumlevels
def run(self): shapefiles = shell( 'ls {dir}/*.shp'.format(dir=os.path.join('tmp', classpath( self), str(self.year), self.geography))).strip().split('\n') cmd = 'ogrinfo {shpfile_path}'.format(shpfile_path=shapefiles[0]) resp = shell(cmd) if 'Polygon' in resp: nlt = '-nlt MultiPolygon' else: nlt = '' cmd = 'PG_USE_COPY=yes PGCLIENTENCODING=latin1 ' \ 'ogr2ogr -f PostgreSQL "PG:dbname=$PGDATABASE active_schema={schema}" ' \ '-t_srs "EPSG:4326" {nlt} -nln {tablename} ' \ '-lco OVERWRITE=yes ' \ '-lco SCHEMA={schema} {shpfile_path} '.format( tablename=self.output().tablename, schema=self.output().schema, nlt=nlt, shpfile_path=shapefiles.pop()) shell(cmd) # chunk into 500 shapefiles at a time. for i, shape_group in enumerate(grouper(shapefiles, 500)): shell('export PG_USE_COPY=yes PGCLIENTENCODING=latin1; ' 'echo \'{shapefiles}\' | xargs -P 16 -I shpfile_path ' 'ogr2ogr -f PostgreSQL "PG:dbname=$PGDATABASE ' 'active_schema={schema}" -append ' '-t_srs "EPSG:4326" {nlt} -nln {tablename} ' 'shpfile_path '.format(shapefiles='\n'.join( [shp for shp in shape_group if shp]), tablename=self.output().tablename, nlt=nlt, schema=self.output().schema)) print('imported {} shapefiles'.format((i + 1) * 500)) session = current_session() # Spatial index session.execute( 'ALTER TABLE {qualified_table} RENAME COLUMN ' 'wkb_geometry TO geom'.format(qualified_table=self.output().table)) session.execute( 'CREATE INDEX ON {qualified_table} USING GIST (geom)'.format( qualified_table=self.output().table))
def run(self): shapefiles = shell('ls {dir}/*.shp'.format( dir=os.path.join('tmp', classpath(self), str(self.year), self.geography) )).strip().split('\n') cmd = 'PG_USE_COPY=yes PGCLIENTENCODING=latin1 ' \ 'ogr2ogr -f PostgreSQL "PG:dbname=$PGDATABASE active_schema={schema}" ' \ '-t_srs "EPSG:4326" -nlt MultiPolygon -nln {tablename} ' \ '-lco OVERWRITE=yes ' \ '-lco SCHEMA={schema} {shpfile_path} '.format( tablename=self.output().tablename, schema=self.output().schema, shpfile_path=shapefiles.pop()) shell(cmd) # chunk into 500 shapefiles at a time. for i, shape_group in enumerate(grouper(shapefiles, 500)): shell( 'export PG_USE_COPY=yes PGCLIENTENCODING=latin1; ' 'echo \'{shapefiles}\' | xargs -P 16 -I shpfile_path ' 'ogr2ogr -f PostgreSQL "PG:dbname=$PGDATABASE ' 'active_schema={schema}" -append ' '-t_srs "EPSG:4326" -nlt MultiPolygon -nln {tablename} ' 'shpfile_path '.format( shapefiles='\n'.join([shp for shp in shape_group if shp]), tablename=self.output().tablename, schema=self.output().schema)) print 'imported {} shapefiles'.format((i + 1) * 500) session = current_session() # Spatial index session.execute('ALTER TABLE {qualified_table} RENAME COLUMN ' 'wkb_geometry TO geom'.format( qualified_table=self.output().table)) session.execute('CREATE INDEX ON {qualified_table} USING GIST (geom)'.format( qualified_table=self.output().table))
def directory(self): return os.path.join('tmp', classpath(self), str(self.year), self.geography)
def output(self): return LocalTarget(os.path.join('tmp', classpath(self), self.task_id) + '_' + underscore_slugify(self.last_time) + '.csv')
def output(self): return LocalTarget( path=os.path.join('tmp', classpath(self), self.filename()))
def output(self): return LocalTarget( os.path.join('tmp', classpath(self), unqualified_task_id(self.task_id) + '.dump'))
def output(self): return LocalTarget( os.path.join('tmp', classpath(self), self.task_id) + '_' + underscore_slugify(self.last_time) + '.csv')
def output(self): return LocalTarget( os.path.join('tmp', classpath(self), self.seq).lower())
def output(self): return LocalTarget(os.path.join('tmp', classpath(self), self.task_id, 'au_mb_all_merged.shp'))
def output(self): return LocalTarget(path=os.path.join(classpath(self), self.filename()))
def output(self): return LocalTarget(os.path.join('tmp', classpath(self), self.task_id))
def __init__(self, task): self.path = os.path.join('tmp', classpath(task), task.task_id) self._target = LocalTarget(self.path)