Exemplo n.º 1
0
def main():
    args = _parse_args(sys.argv[1:])
    headers = _collect_headers(args.headers)
    params = _collect_params(args.params)

    logger = logging.getLogger('cli')
    logger.setLevel(args.loglevel)
    handler = logging.StreamHandler()
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    requested_fields = args.fields.split(',') if args.fields else None

    dumper = EsriDumper(args.url,
                        extra_query_args=params,
                        extra_headers=headers,
                        fields=requested_fields,
                        request_geometry=args.request_geometry,
                        proxy=args.proxy,
                        timeout=args.timeout,
                        parent_logger=logger,
                        max_retries=args.max_retries,
                        offset=args.offset)

    if args.jsonlines:
        for feature in dumper:
            args.outfile.write(json.dumps(feature))
            args.outfile.write('\n')
    else:
        args.outfile.write('{"type":"FeatureCollection","features":[\n')
        feature_iter = iter(dumper)
        try:
            feature = next(feature_iter)
            while True:
                args.outfile.write(json.dumps(feature))
                feature = next(feature_iter)
                args.outfile.write(',\n')
        except StopIteration:
            args.outfile.write('\n')
        args.outfile.write(']}')
Exemplo n.º 2
0
    def download(self, source_urls, workdir, conform=None):
        output_files = []
        download_path = os.path.join(workdir, 'esri')
        mkdirsp(download_path)

        query_fields = self.field_names_to_request(conform)

        for source_url in source_urls:
            size = 0
            file_path = self.get_file_path(source_url, download_path)

            if os.path.exists(file_path):
                output_files.append(file_path)
                _L.debug("File exists %s", file_path)
                continue

            downloader = EsriDumper(source_url, parent_logger=_L)

            metadata = downloader.get_metadata()

            if query_fields is None:
                field_names = [f['name'] for f in metadata['fields']]
            else:
                field_names = query_fields[:]

            if X_FIELDNAME not in field_names:
                field_names.append(X_FIELDNAME)
            if Y_FIELDNAME not in field_names:
                field_names.append(Y_FIELDNAME)
            if GEOM_FIELDNAME not in field_names:
                field_names.append(GEOM_FIELDNAME)

            # Get the count of rows in the layer
            row_count = downloader.get_feature_count()

            _L.info("Source has {} rows".format(row_count))

            with csvopen(file_path, 'w', encoding='utf-8') as f:
                writer = csvDictWriter(f,
                                       fieldnames=field_names,
                                       encoding='utf-8')
                writer.writeheader()

                for feature in downloader:
                    try:
                        geom = feature.get('geometry') or {}
                        row = feature.get('properties') or {}

                        if not geom:
                            raise TypeError("No geometry parsed")
                        if any((isinstance(g, float) and math.isnan(g))
                               for g in traverse(geom)):
                            raise TypeError("Geometry has NaN coordinates")

                        shp = shape(feature['geometry'])
                        row[GEOM_FIELDNAME] = shp.wkt
                        try:
                            centroid = shp.centroid
                        except RuntimeError as e:
                            if 'Invalid number of points in LinearRing found' not in str(
                                    e):
                                raise
                            xmin, xmax, ymin, ymax = shp.bounds
                            row[X_FIELDNAME] = round(xmin / 2 + xmax / 2, 7)
                            row[Y_FIELDNAME] = round(ymin / 2 + ymax / 2, 7)
                        else:
                            row[X_FIELDNAME] = round(centroid.x, 7)
                            row[Y_FIELDNAME] = round(centroid.y, 7)

                        writer.writerow(
                            {fn: row.get(fn)
                             for fn in field_names})
                        size += 1
                    except TypeError:
                        _L.debug("Skipping a geometry", exc_info=True)

            _L.info("Downloaded %s ESRI features for file %s", size, file_path)
            output_files.append(file_path)
        return output_files
Exemplo n.º 3
0
    def download(self, source_urls, workdir, source_config):
        output_files = []
        download_path = os.path.join(workdir, 'esri')
        mkdirsp(download_path)

        query_fields = EsriRestDownloadTask.field_names_to_request(
            source_config)

        for source_url in source_urls:
            size = 0
            file_path = self.get_file_path(source_url, download_path)

            if os.path.exists(file_path):
                output_files.append(file_path)
                _L.debug("File exists %s", file_path)
                continue

            downloader = EsriDumper(source_url, parent_logger=_L, timeout=300)

            metadata = downloader.get_metadata()

            if query_fields is None:
                field_names = [f['name'] for f in metadata['fields']]
            else:
                field_names = query_fields[:]

            if GEOM_FIELDNAME not in field_names:
                field_names.append(GEOM_FIELDNAME)

            field_names = list(map(lambda x: x.upper(), field_names))

            # Get the count of rows in the layer
            try:
                row_count = downloader.get_feature_count()
                _L.info("Source has {} rows".format(row_count))
            except EsriDownloadError:
                _L.info("Source doesn't support count")

            with open(file_path, 'w', encoding='utf-8') as f:
                writer = csv.DictWriter(f, fieldnames=field_names)
                writer.writeheader()

                for feature in downloader:
                    try:
                        geom = feature.get('geometry') or {}
                        row = feature.get('properties') or {}

                        if not geom:
                            raise TypeError("No geometry parsed")
                        if any((isinstance(g, float) and math.isnan(g))
                               for g in traverse(geom)):
                            raise TypeError("Geometry has NaN coordinates")

                        shp = shape(feature['geometry'])
                        row[GEOM_FIELDNAME] = shp.wkt

                        r = dict()
                        for k, v in row.items():
                            r[k.upper()] = v
                        row = r

                        writer.writerow(
                            {fn: row.get(fn)
                             for fn in field_names})
                        size += 1
                    except TypeError:
                        _L.debug("Skipping a geometry", exc_info=True)

            _L.info("Downloaded %s ESRI features for file %s", size, file_path)
            output_files.append(file_path)
        return output_files
Exemplo n.º 4
0
    def download(self, source_urls, workdir, conform=None):
        output_files = []
        download_path = os.path.join(workdir, 'esri')
        mkdirsp(download_path)

        query_fields = EsriRestDownloadTask.field_names_to_request(conform)

        for source_url in source_urls:
            size = 0
            file_path = self.get_file_path(source_url, download_path)

            if os.path.exists(file_path):
                output_files.append(file_path)
                _L.debug("File exists %s", file_path)
                continue

            downloader = EsriDumper(source_url, parent_logger=_L, timeout=300)

            metadata = downloader.get_metadata()

            if query_fields is None:
                field_names = [f['name'] for f in metadata['fields']]
            else:
                field_names = query_fields[:]

            if X_FIELDNAME not in field_names:
                field_names.append(X_FIELDNAME)
            if Y_FIELDNAME not in field_names:
                field_names.append(Y_FIELDNAME)
            if GEOM_FIELDNAME not in field_names:
                field_names.append(GEOM_FIELDNAME)

            # Get the count of rows in the layer
            try:
                row_count = downloader.get_feature_count()
                _L.info("Source has {} rows".format(row_count))
            except EsriDownloadError:
                _L.info("Source doesn't support count")

            with open(file_path, 'w', encoding='utf-8') as f:
                writer = csv.DictWriter(f, fieldnames=field_names)
                writer.writeheader()

                for feature in downloader:
                    try:
                        geom = feature.get('geometry') or {}
                        row = feature.get('properties') or {}

                        if not geom:
                            raise TypeError("No geometry parsed")
                        if any((isinstance(g, float) and math.isnan(g)) for g in traverse(geom)):
                            raise TypeError("Geometry has NaN coordinates")

                        shp = shape(feature['geometry'])
                        row[GEOM_FIELDNAME] = shp.wkt
                        try:
                            centroid = shp.centroid
                        except RuntimeError as e:
                            if 'Invalid number of points in LinearRing found' not in str(e):
                                raise
                            xmin, xmax, ymin, ymax = shp.bounds
                            row[X_FIELDNAME] = round(xmin/2 + xmax/2, 7)
                            row[Y_FIELDNAME] = round(ymin/2 + ymax/2, 7)
                        else:
                            if centroid.is_empty:
                                raise TypeError(json.dumps(feature['geometry']))
                            row[X_FIELDNAME] = round(centroid.x, 7)
                            row[Y_FIELDNAME] = round(centroid.y, 7)

                        writer.writerow({fn: row.get(fn) for fn in field_names})
                        size += 1
                    except TypeError:
                        _L.debug("Skipping a geometry", exc_info=True)

            _L.info("Downloaded %s ESRI features for file %s", size, file_path)
            output_files.append(file_path)
        return output_files