示例#1
0
def intersect_grids(geom_expr, geom_tables: List[Table]):
    """Generate Intersection Query Conditions with Grid Tables."""
    sub_where = []
    outer_join = []
    for geom_table in geom_tables:
        if not hasattr(geom_table.c, "tile"):
            break
        get_srid_expr = func.ST_SRID(geom_table.c.geom)
        sub_where.append(
            and_(
                func.ST_Intersects(func.ST_Transform(geom_expr, get_srid_expr),
                                   geom_table.c.geom),
                Tile.name == geom_table.c.tile,
            ))

        outer_join.append((geom_table, [Tile.name == geom_table.c.tile]))

    return [or_(*sub_where)], outer_join
示例#2
0
    def crs(self) -> Union[str, None]:
        """Retrieve the Coordinate Reference System (CRS) from the GRID."""
        spatial_ref_sys = Table('spatial_ref_sys',
                                db.metadata,
                                schema='public',
                                autoload=True,
                                autoload_with=db.engine)

        geom_table = self.geom_table

        if geom_table is None:
            return None

        res = db.session.query(spatial_ref_sys.c.proj4text)\
            .filter(spatial_ref_sys.c.srid == func.ST_SRID(geom_table.c.geom))\
            .first()

        crs = None
        if res is not None:
            crs = res.proj4text

        return crs
示例#3
0
文件: data.py 项目: betonr/bdc-stac
def get_collection_items(
    collection_id=None,
    roles=[],
    item_id=None,
    bbox=None,
    datetime=None,
    ids=None,
    collections=None,
    intersects=None,
    page=1,
    limit=10,
    query=None,
    **kwargs,
):
    """Retrieve a list of collection items based on filters.

    :param collection_id: Single Collection ID to include in the search for items.
                          Only Items in one of the provided Collection will be searched, defaults to None
    :type collection_id: str, optional
    :param item_id: item identifier, defaults to None
    :type item_id: str, optional
    :param bbox: bounding box for intersection [west, north, east, south], defaults to None
    :type bbox: list, optional
    :param datetime: Single date+time, or a range ('/' seperator), formatted to RFC 3339, section 5.6.
                     Use double dots '..' for open date ranges, defaults to None. If the start or end date of an image
                     generated by a temporal composition intersects the given datetime or range it will be included in the
                     result.
    :type datetime: str, optional
    :param ids: Array of Item ids to return. All other filter parameters that further restrict the
                number of search results are ignored, defaults to None
    :type ids: list, optional
    :param collections: Array of Collection IDs to include in the search for items.
                        Only Items in one of the provided Collections will be searched, defaults to None
    :type collections: list, optional
    :param intersects: Searches items by performing intersection between their geometry and provided GeoJSON geometry.
                       All GeoJSON geometry types must be supported., defaults to None
    :type intersects: dict, optional
    :param page: The page offset of results, defaults to 1
    :type page: int, optional
    :param limit: The maximum number of results to return (page size), defaults to 10
    :type limit: int, optional
    :return: list of collectio items
    :rtype: list
    """
    columns = [
        func.concat(Collection.name, "-",
                    Collection.version).label("collection"),
        Collection.collection_type,
        Collection._metadata.label("meta"),
        Item._metadata.label("item_meta"),
        Item.name.label("item"),
        Item.id,
        Item.collection_id,
        Item.start_date.label("start"),
        Item.end_date.label("end"),
        Item.assets,
        Item.created,
        Item.updated,
        cast(Item.cloud_cover, Float).label("cloud_cover"),
        func.ST_AsGeoJSON(Item.geom).label("geom"),
        func.Box2D(Item.geom).label("bbox"),
        Tile.name.label("tile"),
    ]

    where = [
        Collection.id == Item.collection_id,
        or_(Collection.is_public.is_(True),
            Collection.id.in_([int(r.split(":")[0]) for r in roles])),
    ]

    if ids is not None:
        where += [Item.name.in_(ids.split(","))]
    else:
        if collections is not None:
            where += [
                func.concat(Collection.name, "-",
                            Collection.version).in_(collections.split(","))
            ]
        elif collection_id is not None:
            where += [
                func.concat(Collection.name, "-",
                            Collection.version) == collection_id
            ]

        if item_id is not None:
            where += [Item.name.like(item_id)]

        if query:
            filters = create_query_filter(query)
            if filters:
                where += filters

        if intersects is not None:
            where += [
                func.ST_Intersects(func.ST_GeomFromGeoJSON(str(intersects)),
                                   Item.geom)
            ]
        elif bbox is not None:
            try:
                split_bbox = [float(x) for x in bbox.split(",")]
                if split_bbox[0] == split_bbox[2] or split_bbox[
                        1] == split_bbox[3]:
                    raise InvalidBoundingBoxError("")

                where += [
                    func.ST_Intersects(
                        func.ST_MakeEnvelope(
                            split_bbox[0],
                            split_bbox[1],
                            split_bbox[2],
                            split_bbox[3],
                            func.ST_SRID(Item.geom),
                        ),
                        Item.geom,
                    )
                ]
            except:
                raise (
                    InvalidBoundingBoxError(f"'{bbox}' is not a valid bbox."))

        if datetime is not None:
            date_filter = None
            if "/" in datetime:
                matches_open = ("..", "")
                time_start, time_end = datetime.split("/")
                if time_start in matches_open:  # open start
                    date_filter = [
                        or_(Item.start_date <= time_end,
                            Item.end_date <= time_end)
                    ]
                elif time_end in matches_open:  # open end
                    date_filter = [
                        or_(Item.start_date >= time_start,
                            Item.end_date >= time_start)
                    ]
                else:  # closed range
                    date_filter = [
                        or_(
                            and_(Item.start_date >= time_start,
                                 Item.start_date <= time_end),
                            and_(Item.end_date >= time_start,
                                 Item.end_date <= time_end),
                            and_(Item.start_date < time_start,
                                 Item.end_date > time_end),
                        )
                    ]
            else:
                date_filter = [
                    and_(Item.start_date <= datetime,
                         Item.end_date >= datetime)
                ]
            where += date_filter
    outer = [Item.tile_id == Tile.id]
    query = session.query(*columns).outerjoin(
        Tile, *outer).filter(*where).order_by(Item.start_date.desc(), Item.id)

    result = query.paginate(page=int(page),
                            per_page=int(limit),
                            error_out=False,
                            max_per_page=BDC_STAC_MAX_LIMIT)

    return result
示例#4
0
class Place(Base):
    __tablename__ = 'place'
    place_id = Column(BigInteger, primary_key=True, autoincrement=False)
    osm_type = Column(osm_type_enum, nullable=False)
    osm_id = Column(BigInteger, nullable=False)
    radius = Column(Integer)  # in metres, only for nodes
    display_name = Column(String, nullable=False)
    category = Column(String, nullable=False)
    type = Column(String, nullable=False)
    place_rank = Column(Integer, nullable=False)
    icon = Column(String)
    geom = Column(Geography(spatial_index=True))
    south = Column(Float, nullable=False)
    west = Column(Float, nullable=False)
    north = Column(Float, nullable=False)
    east = Column(Float, nullable=False)
    extratags = deferred(Column(JSON))
    address = deferred(Column(JSON))
    namedetails = deferred(Column(JSON))
    item_count = Column(Integer)
    candidate_count = Column(Integer)
    state = Column(String, index=True)
    override_name = Column(String)
    lat = Column(Float)
    lon = Column(Float)
    added = Column(DateTime, default=now_utc())
    wikidata_query_timeout = Column(Boolean, default=False)
    wikidata = Column(String)
    item_types_retrieved = Column(Boolean, default=False)
    index_hide = Column(Boolean, default=False)
    overpass_is_in = deferred(Column(JSON))
    existing_wikidata = deferred(Column(JSON))

    area = column_property(func.ST_Area(geom))
    geometry_type = column_property(func.GeometryType(geom))
    geojson = column_property(func.ST_AsGeoJSON(geom, 4), deferred=True)
    srid = column_property(func.ST_SRID(geom))
    npoints = column_property(func.ST_NPoints(cast(geom, Geometry)),
                              deferred=True)
    # match_ratio = column_property(candidate_count / item_count)
    num_geom = column_property(func.ST_NumGeometries(cast(geom, Geometry)),
                               deferred=True)

    items = relationship('Item',
                         secondary='place_item',
                         lazy='dynamic',
                         backref=backref('places', lazy='dynamic'))

    __table_args__ = (UniqueConstraint('osm_type', 'osm_id'), )

    @property
    def osm_url(self):
        return f'{base_osm_url}/{self.osm_type}/{self.osm_id}'

    @classmethod
    def get_by_osm(cls, osm_type, osm_id):
        return cls.query.filter_by(osm_type=osm_type,
                                   osm_id=osm_id).one_or_none()

    @classmethod
    def from_osm(cls, osm_type, osm_id):
        place = cls.get_by_osm(osm_type, osm_id)
        if place:
            return place

        hit = nominatim.reverse(osm_type, osm_id)
        try:
            place = Place.from_nominatim(hit)
        except KeyError:
            return None
        session.add(place)
        session.commit()
        return place

    @property
    def type_label(self):
        t = self.type.replace('_', ' ')
        cat = self.category.replace('_', ' ')
        if cat == 'place':
            return t
        if t == 'yes':
            return cat
        return t + ' ' + cat

    @classmethod
    def get_by_wikidata(cls, qid):
        q = cls.query.filter_by(wikidata=qid)
        try:
            return q.one_or_none()
        except MultipleResultsFound:
            return None

    def get_address_key(self, key):
        if isinstance(self.address, dict):
            return self.address.get(key)
        for line in self.address or []:
            if line['type'] == key:
                return line['name']

    @property
    def country_code(self):
        return self.get_address_key('country_code')

    @property
    def country(self):
        return self.get_address_key('country')

    @classmethod
    def get_or_abort(cls, osm_type, osm_id):
        place = cls.get_by_osm(osm_type, osm_id)
        if place:
            return place
        abort(404)

    @hybrid_property
    def area_in_sq_km(self):
        return self.area / (1000 * 1000)

    @property
    def type_and_id(self):
        return (self.osm_type, self.osm_id)

    @property
    def too_big(self):
        max_area = current_app.config['PLACE_MAX_AREA']
        return self.area_in_sq_km > max_area

    @property
    def too_complex(self):
        return self.npoints > current_app.config['PLACE_MAX_NPOINTS']

    @property
    def bad_geom_type(self):
        return self.geometry_type in {'LINESTRING', 'MULTILINESTRING'}

    @property
    def area_in_range(self):
        min_area = current_app.config['PLACE_MIN_AREA']
        if g.user.is_authenticated:
            max_area = current_app.config['PLACE_MAX_AREA']
        else:
            max_area = current_app.config['PLACE_MAX_AREA_ANON']

        return min_area < self.area_in_sq_km < max_area

    @property
    def allowed_cat(self):
        cats = {
            'place', 'boundary', 'natural', 'leisure', 'amenity', 'landuse'
        }
        return self.category in cats

    @property
    def matcher_allowed(self):
        '''Are we allowd to run the matcher for this place?'''

        allow_node = bool(current_app.config.get('ALLOW_NODE_MATCH'))
        if self.osm_type == 'node':
            return allow_node
        return (not self.bad_geom_type and self.allowed_cat
                and self.area_in_range and not self.too_complex)

    def update_from_nominatim(self, hit):
        if self.place_id != int(hit['place_id']):
            print((self.place_id, hit['place_id']))
            self.place_id = hit['place_id']

        keys = ('lat', 'lon', 'display_name', 'place_rank', 'category', 'type',
                'icon', 'extratags', 'namedetails')
        assert all(hit[n] is not None for n in ('lat', 'lon'))
        for n in keys:
            setattr(self, n, hit.get(n))
        bbox = hit['boundingbox']
        assert all(i is not None for i in bbox)
        (self.south, self.north, self.west, self.east) = bbox
        self.address = [
            dict(name=n, type=t) for t, n in hit['address'].items()
        ]
        self.wikidata = hit['extratags'].get('wikidata')
        self.geom = hit['geotext']

    def change_comment(self, item_count):
        if item_count == 1:
            return g.user.single or default_change_comments['single']
        comment = getattr(g.user, 'multi',
                          None) or default_change_comments['multi']
        return comment.replace('PLACE', self.name_for_change_comment)

    @property
    def name_for_changeset(self):
        address = self.address
        n = self.name
        if not address:
            return self.name
        if isinstance(address, list):
            d = {a['type']: a['name'] for a in address}
        elif isinstance(address, dict):
            d = address

        if d.get('country_code') == 'us':
            state = d.get('state')
            if state and n != state:
                return n + ', ' + state

        country = d.get('country')
        if country and self.name != country:
            return '{} ({})'.format(self.name, country)

        return self.name

    def update_address(self):
        hit = nominatim.reverse(self.osm_type, self.osm_id, polygon_text=0)
        self.address = [
            dict(name=n, type=t) for t, n in hit['address'].items()
        ]
        session.commit()

    @property
    def name_for_change_comment(self):
        n = self.name

        if self.address:
            if isinstance(self.address, dict):
                self.update_address()

            address = {a['type']: a['name'] for a in self.address}

            parts = []
            country_code = address.get('country_code')
            skip = {'country_code', 'postcode'}
            if country_code in {'us'}:
                skip.add('county')
            if country_code in {'gb', 'us'} and 'state' in address:
                skip.add('country')
            if self.type in {'university', 'hospital', 'administrative'}:
                skip |= {'path', 'footway', 'road', 'neighbourhood'}
            if (country_code == 'gb' and self.category == 'boundary' and
                    self.type in {'traditional', 'ceremonial', 'historic'}):
                parts = [
                    a for a in self.address
                    if a['type'] in {'state_district', 'state'}
                ]
            else:
                parts = [a for a in self.address if a['type'] not in skip]

            name_parts = [n]
            prev_part = n
            for part in parts:
                if part['name'] == prev_part or (
                        part['type'] != 'city' and
                    (part['name'] in prev_part or prev_part in part['name'])):
                    continue
                name_parts.append(part['name'])
                prev_part = part['name']

            n = ', '.join(name_parts)
        if (' of ' in n or 'national park' in n.lower()) and ', ' not in n:
            return 'the ' + n
        else:
            return n

    @classmethod
    def from_nominatim(cls, hit):
        keys = ('place_id', 'osm_type', 'osm_id', 'lat', 'lon', 'display_name',
                'place_rank', 'category', 'type', 'icon', 'extratags',
                'namedetails')
        n = {k: hit[k] for k in keys if k in hit}
        bbox = hit['boundingbox']
        (n['south'], n['north'], n['west'], n['east']) = bbox
        n['geom'] = hit['geotext']
        n['address'] = [
            dict(name=n, type=t) for t, n in hit['address'].items()
        ]
        if 'extratags' in hit:
            n['wikidata'] = hit['extratags'].get('wikidata')
        return cls(**n)

    @classmethod
    def get_or_add_place(cls, hit):
        place = cls.query.filter_by(osm_type=hit['osm_type'],
                                    osm_id=hit['osm_id']).one_or_none()

        if place and place.place_id != hit['place_id']:
            place.update_from_nominatim(hit)
        elif not place:
            place = Place.query.get(hit['place_id'])
            if place:
                place.update_from_nominatim(hit)
            else:
                place = cls.from_nominatim(hit)
                session.add(place)
        session.commit()
        return place

    @property
    def match_ratio(self):
        if self.item_count:
            return self.candidate_count / self.item_count

    @property
    def bbox(self):
        return (self.south, self.north, self.west, self.east)

    @property
    def is_point(self):
        return self.osm_type == 'node'

    @property
    def display_area(self):
        return '{:.1f} km²'.format(self.area_in_sq_km)

    def get_wikidata_query(self):
        # this is an old function, it isn't used by the matcher
        if self.osm_type == 'node':
            radius = self.radius or radius_default
            query = wikidata.get_point_query(self.lat, self.lon, radius)
        else:
            query = wikidata.get_enwiki_query(*self.bbox)
        return query

    def point_wikidata_items(self):
        radius = self.radius or radius_default
        query_map = wikidata.point_query_map(self.lat, self.lon, radius)
        return self.items_from_wikidata(query_map)

    def bbox_wikidata_items(self, bbox=None):
        if bbox is None:
            bbox = self.bbox

        query_map = wikidata.bbox_query_map(*bbox)
        items = self.items_from_wikidata(query_map)

        # Would be nice to include OSM chunk information with each
        # item. Not doing it at this point because it means lots
        # of queries. Easier once the items are loaded into the database.
        return {k: v for k, v in items.items() if self.covers(v)}

    def items_from_wikidata(self, query_map):
        rows = wikidata.run_query(query_map['enwiki'])
        items = wikidata.parse_enwiki_query(rows)

        try:  # add items with the coordinates in the HQ field
            rows = wikidata.run_query(query_map['hq_enwiki'])
            items.update(wikidata.parse_enwiki_query(rows))
        except wikidata_api.QueryError:
            pass  # HQ query timeout isn't fatal

        rows = wikidata.run_query(query_map['item_tag'])
        wikidata.parse_item_tag_query(rows, items)

        try:  # add items with the coordinates in the HQ field
            rows = wikidata.run_query(query_map['hq_item_tag'])
            wikidata.parse_item_tag_query(rows, items)
        except wikidata_api.QueryError:
            pass  # HQ query timeout isn't fatal

        return items

    def covers(self, item):
        ''' Is the given item within the geometry of this place. '''
        q = (select([func.ST_Covers(Place.geom, item['location'])
                     ]).where(Place.place_id == self.place_id))
        return object_session(self).scalar(q)

    def add_tags_to_items(self):
        for item in self.items.filter(Item.categories != '{}'):
            # if wikidata says this is a place then adding tags
            # from wikipedia can just confuse things
            if any(t.startswith('place') for t in item.tags):
                continue
            for t in matcher.categories_to_tags(item.categories):
                item.tags.add(t)

    @property
    def prefix(self):
        return f'osm_{self.place_id}'

    @property
    def identifier(self):
        return f'{self.osm_type}/{self.osm_id}'

    @property
    def overpass_filename(self):
        overpass_dir = current_app.config['OVERPASS_DIR']
        return os.path.join(overpass_dir, '{}.xml'.format(self.place_id))

    def is_overpass_filename(self, f):
        ''' Does the overpass filename belongs to this place. '''
        place_id = str(self.place_id)
        return f == place_id + '.xml' or f.startswith(place_id + '_')

    def delete_overpass(self):
        for f in os.scandir(current_app.config['OVERPASS_DIR']):
            if self.is_overpass_filename(f.name):
                os.remove(f.path)

    def clean_up(self):
        place_id = self.place_id

        engine = session.bind
        for t in get_tables():
            if not t.startswith(self.prefix):
                continue
            engine.execute(f'drop table if exists {t}')
        engine.execute('commit')

        overpass_dir = current_app.config['OVERPASS_DIR']
        for f in os.listdir(overpass_dir):
            if not any(
                    f.startswith(str(place_id) + end) for end in ('_', '.')):
                continue
            os.remove(os.path.join(overpass_dir, f))

    @property
    def overpass_done(self):
        return os.path.exists(self.overpass_filename)

    def items_with_candidates(self):
        return self.items.join(ItemCandidate)

    def items_with_candidates_count(self):
        if self.state != 'ready':
            return
        return (session.query(Item.item_id).join(PlaceItem).join(Place).join(
            ItemCandidate).filter(Place.place_id == self.place_id).group_by(
                Item.item_id).count())

    def items_without_candidates(self):
        return self.items.outerjoin(ItemCandidate).filter(
            ItemCandidate.item_id.is_(None))

    def items_with_multiple_candidates(self):
        # select count(*) from (select 1 from item, item_candidate where item.item_id=item_candidate.item_id) x;
        q = (self.items.join(ItemCandidate).group_by(Item.item_id).having(
            func.count(Item.item_id) > 1).with_entities(Item.item_id))
        return q

    @property
    def name(self):
        if self.override_name:
            return self.override_name

        name = self.namedetails.get('name:en') or self.namedetails.get('name')
        display = self.display_name
        if not name:
            return display

        for short in ('City', '1st district'):
            start = len(short) + 2
            if name == short and display.startswith(
                    short + ', ') and ', ' in display[start:]:
                name = display[:display.find(', ', start)]
                break

        return name

    @property
    def name_extra_detail(self):
        for n in 'name:en', 'name':
            if n not in self.namedetails:
                continue
            start = self.namedetails[n] + ', '
            if self.display_name.startswith(start):
                return self.display_name[len(start):]

    @property
    def export_name(self):
        return self.name.replace(':', '').replace(' ', '_')

    def items_with_instanceof(self):
        return [item for item in self.items if item.instanceof()]

    def osm2pgsql_cmd(self, filename=None):
        if filename is None:
            filename = self.overpass_filename
        style = os.path.join(current_app.config['DATA_DIR'], 'matcher.style')
        return [
            'osm2pgsql', '--create', '--drop', '--slim', '--hstore-all',
            '--hstore-add-index', '--prefix', self.prefix, '--cache', '500',
            '--style', style, '--multi-geometry', '--host',
            current_app.config['DB_HOST'], '--username',
            current_app.config['DB_USER'], '--database',
            current_app.config['DB_NAME'], filename
        ]

    def load_into_pgsql(self, filename=None, capture_stderr=True):
        if filename is None:
            filename = self.overpass_filename

        if not os.path.exists(filename):
            return 'no data from overpass to load with osm2pgsql'

        if os.stat(filename).st_size == 0:
            return 'no data from overpass to load with osm2pgsql'

        cmd = self.osm2pgsql_cmd(filename)

        if not capture_stderr:
            p = subprocess.run(
                cmd, env={'PGPASSWORD': current_app.config['DB_PASS']})
            return
        p = subprocess.run(cmd,
                           stderr=subprocess.PIPE,
                           env={'PGPASSWORD': current_app.config['DB_PASS']})
        if p.returncode != 0:
            if b'Out of memory' in p.stderr:
                return 'out of memory'
            else:
                return p.stderr.decode('utf-8')

    def save_overpass(self, content):
        with open(self.overpass_filename, 'wb') as out:
            out.write(content)

    @property
    def all_tags(self):
        tags = set()
        for item in self.items:
            tags |= set(item.tags)
            tags |= item.disused_tags()
        tags.difference_update(skip_tags)
        return matcher.simplify_tags(tags)

    @property
    def overpass_type(self):
        return overpass_types[self.osm_type]

    @property
    def overpass_filter(self):
        return 'around:{0.radius},{0.lat},{0.lon}'.format(self)

    @property
    def wikidata_item_id(self):
        if self.wikidata:
            return int(self.wikidata[1:])

    def building_names(self):
        re_paren = re.compile(r'\(.+\)')
        re_drop = re.compile(r'\b(the|and|at|of|de|le|la|les|von)\b')
        names = set()
        for building in (item for item in self.items
                         if 'building' in item.tags):
            for n in building.names():
                if n[0].isdigit() and ',' in n:
                    continue
                n = n.lower()
                comma = n.rfind(', ')
                if comma != -1 and not n[0].isdigit():
                    n = n[:comma]

                n = re_paren.sub('', n).replace("'s", "('s)?")
                n = n.replace('(', '').replace(')', '').replace('.', r'\.')
                names.add(n)
                names.add(re_drop.sub('', n))

        names = sorted(n.replace(' ', r'\W*') for n in names)
        if names:
            return '({})'.format('|'.join(names))

    def get_point_oql(self, buildings_special=False):
        tags = self.all_tags

        if buildings_special and 'building' in tags:
            buildings = self.building_names()
            tags.remove('building')
        else:
            buildings = None

        radius = self.radius or radius_default
        return overpass.oql_for_point(self.lat, self.lon, radius, tags,
                                      buildings)

    def get_bbox_oql(self, buildings_special=False):
        bbox = f'{self.south:f},{self.west:f},{self.north:f},{self.east:f}'

        tags = self.all_tags

        if buildings_special and 'building' in tags:
            buildings = self.building_names()
            tags.remove('building')
        else:
            buildings = None

        return overpass.oql_for_area(self.overpass_type, self.osm_id, tags,
                                     bbox, buildings)

        union = ['{}({});'.format(self.overpass_type, self.osm_id)]

        for tag in self.all_tags:
            u = (oql_from_tag(tag, filters=self.overpass_filter)
                 if self.osm_type == 'node' else oql_from_tag(tag))
            if u:
                union += u

        if self.osm_type == 'node':
            oql = ('[timeout:300][out:xml];\n' + '({});\n' + '(._;>;);\n' +
                   'out qt;').format(''.join(union))
            return oql

        bbox = '{:f},{:f},{:f},{:f}'.format(self.south, self.west, self.north,
                                            self.east)
        offset = {'way': 2400000000, 'relation': 3600000000}
        area_id = offset[self.osm_type] + int(self.osm_id)

        oql = ('[timeout:300][out:xml][bbox:{}];\n' + 'area({})->.a;\n' +
               '({});\n' + '(._;>;);\n' + 'out qt;').format(
                   bbox, area_id, ''.join(union))
        return oql

    def get_oql(self, buildings_special=False):
        if self.is_point:
            return self.get_point_oql(buildings_special=False)
        else:
            return self.get_bbox_oql(buildings_special=False)

    def candidates_url(self, **kwargs):
        if g.get('filter'):
            kwargs['name_filter'] = g.filter
            endpoint = 'candidates_with_filter'
        else:
            endpoint = 'candidates'

        return self.place_url(endpoint, **kwargs)

    def place_url(self, endpoint, **kwargs):
        return url_for(endpoint,
                       osm_type=self.osm_type,
                       osm_id=self.osm_id,
                       **kwargs)

    def browse_url(self):
        if self.wikidata:
            return url_for('browse_page', item_id=self.wikidata_item_id)

    def next_state_url(self):
        return (self.candidates_url()
                if self.state == 'ready' else self.matcher_progress_url())

    def matcher_progress_url(self):
        return self.place_url('matcher.matcher_progress')

    def matcher_done_url(self, start):
        return self.place_url('matcher.matcher_done', start=start)

    def item_list(self):
        lang = self.most_common_language() or 'en'
        q = self.items.filter(Item.entity.isnot(None)).order_by(Item.item_id)
        return [{'id': i.item_id, 'name': i.label(lang=lang)} for i in q]

    def save_items(self, items, debug=None):
        if debug is None:

            def debug(msg):
                pass

        debug('save items')
        seen = {}
        for qid, v in items.items():
            wikidata_id = int(qid[1:])
            item = Item.query.get(wikidata_id)

            debug(f'saving: {qid}')

            if item:
                item.location = v['location']
            else:
                item = Item(item_id=wikidata_id, location=v['location'])
                session.add(item)
            for k in 'enwiki', 'categories', 'query_label':
                if k in v:
                    setattr(item, k, v[k])

            tags = set(v['tags'])
            # if wikidata says this is a place then adding tags
            # from wikipedia can just confuse things
            # Wikipedia articles sometimes combine a village and a windmill
            # or a neighbourhood and a light rail station.
            # Exception for place tags, we always add place tags from
            # Wikipedia categories.
            if 'categories' in v:
                is_place = any(t.startswith('place') for t in tags)
                for t in matcher.categories_to_tags(v['categories']):
                    if t.startswith('place') or not is_place:
                        tags.add(t)

            # drop_building_tag(tags)

            tags -= skip_tags

            item.tags = tags
            if qid in seen:
                continue

            seen[qid] = item

            existing = PlaceItem.query.filter_by(item=item,
                                                 place=self).one_or_none()
            if not existing:
                place_item = PlaceItem(item=item, place=self)
                session.add(place_item)
            debug(f'saved: {qid}')

        for item in self.items:
            if item.qid in seen:
                continue
            link = PlaceItem.query.filter_by(item=item, place=self).one()
            session.delete(link)
        debug('done')

        return seen

    def load_items(self, bbox=None, debug=False):
        if bbox is None:
            bbox = self.bbox

        items = self.bbox_wikidata_items(bbox)
        if debug:
            print('{:d} items'.format(len(items)))

        wikipedia.add_enwiki_categories(items)

        self.save_items(items)

        session.commit()

    def load_extracts(self, debug=False, progress=None):
        for code, _ in self.languages_wikidata():
            self.load_extracts_wiki(debug=debug, progress=progress, code=code)

    def load_extracts_wiki(self, debug=False, progress=None, code='en'):
        wiki = code + 'wiki'
        by_title = {
            item.sitelinks()[wiki]['title']: item
            for item in self.items if wiki in (item.sitelinks() or {})
        }

        query_iter = wikipedia.get_extracts(by_title.keys(), code=code)
        for title, extract in query_iter:
            item = by_title[title]
            if debug:
                print(title)
            item.extracts[wiki] = extract
            if wiki == 'enwiki':
                item.extract_names = wikipedia.html_names(extract)
            if progress:
                progress(item)

    def wbgetentities(self, debug=False):
        sub = (session.query(Item.item_id).join(ItemTag).group_by(
            Item.item_id).subquery())
        q = (self.items.filter(Item.item_id == sub.c.item_id).options(
            load_only(Item.qid)))

        if debug:
            print('running wbgetentities query')
            print(q)
            print(q.count())
        items = {i.qid: i for i in q}
        if debug:
            print('{} items'.format(len(items)))

        for qid, entity in wikidata_api.entity_iter(items.keys(), debug=debug):
            if debug:
                print(qid)
            items[qid].entity = entity

    def languages_osm(self):
        lang_count = Counter()

        candidate_count = 0
        candidate_has_language_count = 0
        for c in self.items_with_candidates().with_entities(ItemCandidate):
            candidate_count += 1
            candidate_has_language = False
            for lang in c.languages():
                lang_count[lang] += 1
                candidate_has_language = True
            if candidate_has_language:
                candidate_has_language_count += 1

        return sorted(lang_count.items(), key=lambda i: i[1], reverse=True)

    def languages_wikidata(self):
        lang_count = Counter()
        item_count = self.items.count()
        count_sv = self.country_code in {'se', 'fi'}

        for item in self.items:
            if item.entity and 'labels' in item.entity:
                keys = item.entity['labels'].keys()
                if not count_sv and keys == {'ceb', 'sv'}:
                    continue
                for lang in keys:
                    if '-' in lang or lang == 'ceb':
                        continue
                    lang_count[lang] += 1

        if item_count > 10:
            # truncate the long tail of languages
            lang_count = {
                key: count
                for key, count in lang_count.items()
                if key == 'en' or count / item_count > 0.1
            }

        if self.country_code == 'us':
            lang_count = {
                key: count
                for key, count in lang_count.items() if key in {'en', 'es'}
            }

        if self.country_code == 'gb':
            lang_count = {
                key: count
                for key, count in lang_count.items()
                if key in {'en', 'fr', 'de', 'cy'}
            }

        return sorted(lang_count.items(), key=lambda i: i[1],
                      reverse=True)[:10]

    def languages(self):
        wikidata = self.languages_wikidata()
        osm = dict(self.languages_osm())

        return [{
            'code': code,
            'wikidata': count,
            'osm': osm.get(code)
        } for code, count in wikidata]

    def most_common_language(self):
        lang_count = Counter()
        for item in self.items:
            if item.entity and 'labels' in item.entity:
                for lang in item.entity['labels'].keys():
                    lang_count[lang] += 1
        try:
            return lang_count.most_common(1)[0][0]
        except IndexError:
            return None

    def reset_all_items_to_not_done(self):
        place_items = (PlaceItem.query.join(Item).filter(
            Item.entity.isnot(None), PlaceItem.place == self,
            PlaceItem.done == true()).order_by(PlaceItem.item_id))

        for place_item in place_items:
            place_item.done = False
        session.commit()

    def matcher_query(self):
        return (PlaceItem.query.join(Item).filter(
            Item.entity.isnot(None), PlaceItem.place == self,
            or_(PlaceItem.done.is_(None),
                PlaceItem.done != true())).order_by(PlaceItem.item_id))

    def run_matcher(self, debug=False, progress=None):
        if progress is None:

            def progress(candidates, item):
                pass

        conn = session.bind.raw_connection()
        cur = conn.cursor()

        self.existing_wikidata = matcher.get_existing(cur, self.prefix)

        place_items = self.matcher_query()
        total = place_items.count()
        # too many items means something has gone wrong
        assert total < 60_000
        for num, place_item in enumerate(place_items):
            item = place_item.item

            if debug:
                print('searching for', item.label())
                print(item.tags)

            if item.skip_item_during_match():
                candidates = []
            else:
                t0 = time()
                candidates = matcher.find_item_matches(cur,
                                                       item,
                                                       self.prefix,
                                                       debug=debug)
                seconds = time() - t0
                if debug:
                    print('find_item_matches took {:.1f}'.format(seconds))
                    print('{}: {}'.format(len(candidates), item.label()))

            progress(candidates, item)

            # if this is a refresh we remove candidates that no longer match
            as_set = {(i['osm_type'], i['osm_id']) for i in candidates}
            for c in item.candidates[:]:
                if c.edits.count():
                    continue  # foreign keys mean we can't remove saved candidates
                if (c.osm_type, c.osm_id) not in as_set:
                    c.bad_matches.delete()
                    session.delete(c)

            if not candidates:
                continue

            for i in candidates:
                c = ItemCandidate.query.get(
                    (item.item_id, i['osm_id'], i['osm_type']))
                if c:
                    c.update(i)
                else:
                    c = ItemCandidate(**i, item=item)
                    session.add(c)

            place_item.done = True

            if num % 100 == 0:
                session.commit()

        self.item_count = self.items.count()
        self.candidate_count = self.items_with_candidates_count()
        session.commit()

        conn.close()

    def load_isa(self, progress=None):
        if progress is None:

            def progress(msg):
                pass

        isa_map = {
            item.qid: [isa_qid for isa_qid in item.instanceof()]
            for item in self.items
        }
        isa_map = {qid: l for qid, l in isa_map.items() if l}

        if not isa_map:
            return

        download_isa = set()
        isa_obj_map = {}
        for qid, isa_list in isa_map.items():
            isa_objects = []
            # some Wikidata items feature two 'instance of' statements that point to
            # the same item.
            # Example: Cambridge University Museum of Zoology (Q5025605)
            # https://www.wikidata.org/wiki/Q5025605
            seen_isa_qid = set()
            for isa_qid in isa_list:
                if isa_qid in seen_isa_qid:
                    continue
                seen_isa_qid.add(isa_qid)
                item_id = int(isa_qid[1:])
                isa = IsA.query.get(item_id)
                if not isa or not isa.entity:
                    download_isa.add(isa_qid)
                if not isa:
                    isa = IsA(item_id=item_id)
                    session.add(isa)
                isa_obj_map[isa_qid] = isa
                isa_objects.append(isa)
            item = Item.query.get(qid[1:])
            item.isa = isa_objects

        for qid, entity in wikidata_api.entity_iter(download_isa):
            isa_obj_map[qid].entity = entity

        session.commit()

    def do_match(self, debug=True):
        if self.state == 'ready':  # already done
            return

        if not self.state or self.state == 'refresh':
            print('load items')
            self.load_items()  # includes categories
            self.state = 'tags'
            session.commit()

        if self.state == 'tags':
            print('wbgetentities')
            self.wbgetentities(debug=debug)
            print('load extracts')
            self.load_extracts(debug=debug)
            self.state = 'wbgetentities'
            session.commit()

        if self.state in ('wbgetentities', 'overpass_error',
                          'overpass_timeout'):
            print('loading_overpass')
            self.get_overpass()
            self.state = 'postgis'
            session.commit()

        if self.state == 'postgis':
            print('running osm2pgsql')
            self.load_into_pgsql(capture_stderr=False)
            self.state = 'osm2pgsql'
            session.commit()

        if self.state == 'osm2pgsql':
            print('run matcher')
            self.run_matcher(debug=debug)
            self.state = 'load_isa'
            session.commit()

        if self.state == 'load_isa':
            print('load isa')
            self.load_isa()
            print('ready')
            self.state = 'ready'
            session.commit()

    def get_overpass(self):
        oql = self.get_oql()
        if self.area_in_sq_km < 800:
            r = overpass.run_query_persistent(oql)
            assert r
            self.save_overpass(r.content)
        else:
            self.chunk()

    def get_items(self):
        items = [
            item for item in self.items_with_candidates()
            if all('wikidata' not in c.tags for c in item.candidates)
        ]

        filter_list = matcher.filter_candidates_more(items, bad=get_bad(items))
        add_tags = []
        for item, match in filter_list:
            picked = match.get('candidate')
            if not picked:
                continue
            dist = picked.dist
            intersection = set()
            for k, v in picked.tags.items():
                tag = k + '=' + v
                if k in item.tags or tag in item.tags:
                    intersection.add(tag)
            if dist < 400:
                symbol = '+'
            elif dist < 4000 and intersection == {'place=island'}:
                symbol = '+'
            elif dist < 3000 and intersection == {'natural=wetland'}:
                symbol = '+'
            elif dist < 2000 and intersection == {'natural=beach'}:
                symbol = '+'
            elif dist < 2000 and intersection == {'natural=bay'}:
                symbol = '+'
            elif dist < 2000 and intersection == {'aeroway=aerodrome'}:
                symbol = '+'
            elif dist < 1000 and intersection == {'amenity=school'}:
                symbol = '+'
            elif dist < 800 and intersection == {'leisure=park'}:
                symbol = '+'
            elif dist < 2000 and intersection == {'landuse=reservoir'}:
                symbol = '+'
            elif dist < 3000 and item.tags == {'place', 'admin_level'}:
                symbol = '+'
            elif dist < 3000 and item.tags == {
                    'place', 'place=town', 'admin_level'
            }:
                symbol = '+'
            elif dist < 3000 and item.tags == {
                    'admin_level', 'place', 'place=neighbourhood'
            } and 'place' in picked.tags:
                symbol = '+'
            else:
                symbol = '?'

            print('{:1s}  {:9s}  {:5.0f}  {!r}  {!r}'.format(
                symbol, item.qid, picked.dist, item.tags, intersection))
            if symbol == '+':
                add_tags.append((item, picked))
        return add_tags

    def chunk_n(self, n):
        n = max(1, n)
        (south, north, west, east) = self.bbox
        ns = (north - south) / n
        ew = (east - west) / n

        chunks = []
        for row in range(n):
            for col in range(n):
                chunk = (south + ns * row, south + ns * (row + 1),
                         west + ew * col, west + ew * (col + 1))
                want_chunk = func.ST_Intersects(Place.geom, envelope(chunk))
                want = (session.query(want_chunk).filter(
                    Place.place_id == self.place_id).scalar())
                if want:
                    chunks.append(chunk)

        return chunks

    def get_chunks(self):
        bbox_chunks = list(self.polygon_chunk(size=place_chunk_size))

        chunks = []
        need_self = True  # include self in first non-empty chunk
        for num, chunk in enumerate(bbox_chunks):
            filename = self.chunk_filename(num, bbox_chunks)
            oql = self.oql_for_chunk(chunk, include_self=need_self)
            chunks.append({
                'num': num,
                'oql': oql,
                'filename': filename,
            })
            if need_self and oql:
                need_self = False
        return chunks

    def chunk_filename(self, num, chunks):
        if len(chunks) == 1:
            return '{}.xml'.format(self.place_id)
        return '{}_{:03d}_{:03d}.xml'.format(self.place_id, num, len(chunks))

    def chunk(self):
        chunk_size = utils.calc_chunk_size(self.area_in_sq_km)
        chunks = self.chunk_n(chunk_size)

        print('chunk size:', chunk_size)

        files = []
        for num, chunk in enumerate(chunks):
            filename = self.chunk_filename(num, len(chunks))
            # print(num, q.count(), len(tags), filename, list(tags))
            full = os.path.join('overpass', filename)
            files.append(full)
            if os.path.exists(full):
                continue
            oql = self.oql_for_chunk(chunk, include_self=(num == 0))

            r = overpass.run_query_persistent(oql)
            if not r:
                print(oql)
            assert r
            open(full, 'wb').write(r.content)

        cmd = ['osmium', 'merge'] + files + ['-o', self.overpass_filename]
        print(' '.join(cmd))
        subprocess.run(cmd)

    def oql_for_chunk(self, chunk, include_self=False):
        q = self.items.filter(
            cast(Item.location, Geometry).contained(envelope(chunk)))

        tags = set()
        for item in q:
            tags |= set(item.tags)
        tags.difference_update(skip_tags)
        tags = matcher.simplify_tags(tags)
        if not (tags):
            print('no tags, skipping')
            return

        ymin, ymax, xmin, xmax = chunk
        bbox = '{:f},{:f},{:f},{:f}'.format(ymin, xmin, ymax, xmax)

        oql = overpass.oql_for_area(self.overpass_type,
                                    self.osm_id,
                                    tags,
                                    bbox,
                                    None,
                                    include_self=include_self)
        return oql

    def chunk_count(self):
        return sum(1 for _ in self.polygon_chunk(size=place_chunk_size))

    def geojson_chunks(self):
        chunks = []
        for chunk in self.polygon_chunk(size=place_chunk_size):
            clip = func.ST_Intersection(Place.geom, envelope(chunk))

            geojson = (session.query(func.ST_AsGeoJSON(
                clip, 4)).filter(Place.place_id == self.place_id).scalar())

            chunks.append(geojson)
        return chunks

    def wikidata_chunk_size(self, size=22):
        if self.osm_type == 'node':
            return 1

        area = self.area_in_sq_km
        if area < 3000 and not self.wikidata_query_timeout:
            return 1
        return utils.calc_chunk_size(area, size=size)

    def polygon_chunk(self, size=64):
        stmt = (session.query(
            func.ST_Dump(Place.geom.cast(Geometry())).label('x')).filter_by(
                place_id=self.place_id).subquery())

        q = session.query(
            stmt.c.x.path[1],
            func.ST_Area(stmt.c.x.geom.cast(Geography)) / (1000 * 1000),
            func.Box2D(stmt.c.x.geom))

        for num, area, box2d in q:
            chunk_size = utils.calc_chunk_size(area, size=size)
            west, south, east, north = map(float, re_box.match(box2d).groups())
            for chunk in bbox_chunk((south, north, west, east), chunk_size):
                yield chunk

    def latest_matcher_run(self):
        return self.matcher_runs.order_by(PlaceMatcher.start.desc()).first()

    def obj_for_json(self, include_geom=False):
        keys = [
            'osm_type',
            'osm_id',
            'display_name',
            'name',
            'extratags',
            'address',
            'namedetails',
            'state',
            'lat',
            'lon',
            'area_in_sq_km',
            'name_for_changeset',
            'name_for_change_comment',
            'bbox',
        ]
        out = {key: getattr(self, key) for key in keys}
        out['added'] = str(self.added)
        if include_geom:
            out['geom'] = json.loads(self.geojson)

        items = []
        for item in self.items:
            if not item.sitelinks():
                continue
            cur = {
                'labels': item.labels,
                'qid': item.qid,
                'url': item.wikidata_uri,
                'item_identifiers': item.get_item_identifiers(),
                'names': item.names(),
                'sitelinks': item.sitelinks(),
                'location': item.get_lat_lon(),
            }
            if item.categories:
                cur['categories'] = item.categories

            matches = [{
                'osm_type': m.osm_type,
                'osm_id': m.osm_id,
                'dist': m.dist,
                'label': m.label,
            } for m in item.candidates]

            if matches:
                cur['matches'] = matches

            items.append(cur)

        out['items'] = items
        return out

    def refresh_nominatim(self):
        hit = nominatim.reverse(self.osm_type, self.osm_id)
        self.update_from_nominatim(hit)
        session.commit()

    def is_in(self):
        if self.overpass_is_in:
            return self.overpass_is_in

        # self.overpass_is_in = overpass.is_in(self.overpass_type, self.osm_id)
        self.overpass_is_in = overpass.is_in_lat_lon(self.lat, self.lon)
        if self.overpass_is_in:
            session.commit()
        return self.overpass_is_in

    def suggest_larger_areas(self):
        ret = []
        for e in reversed(self.is_in() or []):
            osm_type, osm_id, bounds = e['type'], e['id'], e['bounds']
            if osm_type == self.osm_type and osm_id == self.osm_id:
                continue

            box = func.ST_MakeEnvelope(bounds['minlon'], bounds['minlat'],
                                       bounds['maxlon'], bounds['maxlat'],
                                       4326)

            q = func.ST_Area(box.cast(Geography))
            bbox_area = session.query(q).scalar()
            area_in_sq_km = bbox_area / (1000 * 1000)

            if area_in_sq_km < 10 or area_in_sq_km > 40_000:
                continue
            place = Place.from_osm(osm_type, osm_id)
            if not place:
                continue
            place.admin_level = e['tags'].get(
                'admin_level') or None if 'tags' in e else None
            ret.append(place)

        ret.sort(key=lambda place: place.area_in_sq_km)
        return ret

    def get_candidate_items(self):
        items = self.items_with_candidates()

        if self.existing_wikidata:
            existing = {
                qid: set(tuple(i) for i in osm_list)
                for qid, osm_list in self.existing_wikidata.items()
            }
        else:
            existing = {}

        items = [
            item for item in items
            if item.qid not in existing and all('wikidata' not in c.tags
                                                for c in item.candidates)
        ]

        need_commit = False
        for item in items:
            for c in item.candidates:
                if c.set_match_detail():
                    need_commit = True
        if need_commit:
            session.commit()

        return items
示例#5
0
    def calculate_summary(self, product_name: str,
                          time: Range) -> TimePeriodOverview:
        """
        Create a summary of the given product/time range.
        """
        log = self.log.bind(product_name=product_name, time=time)
        log.debug("summary.query")

        begin_time, end_time, where_clause = self._where(product_name, time)
        select_by_srid = (select((
            func.ST_SRID(DATASET_SPATIAL.c.footprint).label("srid"),
            func.count().label("dataset_count"),
            func.ST_Transform(
                func.ST_Union(DATASET_SPATIAL.c.footprint),
                self._target_srid(),
                type_=Geometry(),
            ).label("footprint_geometry"),
            func.sum(DATASET_SPATIAL.c.size_bytes).label("size_bytes"),
            func.max(DATASET_SPATIAL.c.creation_time).label(
                "newest_dataset_creation_time"),
        )).where(where_clause).group_by("srid").alias("srid_summaries"))

        # Union all srid groups into one summary.
        result = self._engine.execute(
            select((
                func.sum(
                    select_by_srid.c.dataset_count).label("dataset_count"),
                func.array_agg(select_by_srid.c.srid).label("srids"),
                func.sum(select_by_srid.c.size_bytes).label("size_bytes"),
                func.ST_Union(
                    func.ST_Buffer(select_by_srid.c.footprint_geometry, 0),
                    type_=Geometry(srid=self._target_srid()),
                ).label("footprint_geometry"),
                func.max(select_by_srid.c.newest_dataset_creation_time).label(
                    "newest_dataset_creation_time"),
                func.now().label("summary_gen_time"),
            )))

        rows = result.fetchall()
        log.debug("summary.query.done", srid_rows=len(rows))

        assert len(rows) == 1
        row = dict(rows[0])
        row["dataset_count"] = int(
            row["dataset_count"]) if row["dataset_count"] else 0
        if row["footprint_geometry"] is not None:
            row["footprint_crs"] = self._get_srid_name(
                row["footprint_geometry"].srid)
            row["footprint_geometry"] = geo_shape.to_shape(
                row["footprint_geometry"])
        else:
            row["footprint_crs"] = None
        row["crses"] = None
        if row["srids"] is not None:
            row["crses"] = {self._get_srid_name(s) for s in row["srids"]}
        del row["srids"]

        # Convert from Python Decimal
        if row["size_bytes"] is not None:
            row["size_bytes"] = int(row["size_bytes"])

        has_data = row["dataset_count"] > 0

        log.debug("counter.calc")

        # Initialise all requested days as zero
        day_counts = Counter({
            d.date(): 0
            for d in pd.date_range(begin_time, end_time, closed="left")
        })
        region_counts = Counter()
        if has_data:
            day_counts.update(
                Counter({
                    day.date(): count
                    for day, count in self._engine.execute(
                        select([
                            func.date_trunc(
                                "day",
                                DATASET_SPATIAL.c.center_time.op(
                                    "AT TIME ZONE")(self.grouping_time_zone),
                            ).label("day"),
                            func.count(),
                        ]).where(where_clause).group_by("day"))
                }))
            region_counts = Counter({
                item: count
                for item, count in self._engine.execute(
                    select([
                        DATASET_SPATIAL.c.region_code.label("region_code"),
                        func.count(),
                    ]).where(where_clause).group_by("region_code"))
            })

        summary = TimePeriodOverview(
            **row,
            timeline_period="day",
            time_range=Range(begin_time, end_time),
            timeline_dataset_counts=day_counts,
            region_dataset_counts=region_counts,
            # TODO: filter invalid from the counts?
            footprint_count=row["dataset_count"] or 0,
        )

        log.debug(
            "summary.calc.done",
            dataset_count=summary.dataset_count,
            footprints_missing=summary.dataset_count - summary.footprint_count,
        )
        return summary
示例#6
0
def get_collection_items(
    collection_id=None,
    roles=None,
    item_id=None,
    bbox=None,
    datetime=None,
    ids=None,
    collections=None,
    intersects=None,
    page=1,
    limit=10,
    query=None,
    **kwargs,
) -> Pagination:
    """Retrieve a list of collection items based on filters.

    :param collection_id: Single Collection ID to include in the search for items.
                          Only Items in one of the provided Collection will be searched, defaults to None
    :type collection_id: str, optional
    :param item_id: item identifier, defaults to None
    :type item_id: str, optional
    :param bbox: bounding box for intersection [west, north, east, south], defaults to None
    :type bbox: list, optional
    :param datetime: Single date+time, or a range ('/' seperator), formatted to RFC 3339, section 5.6.
                     Use double dots '..' for open date ranges, defaults to None. If the start or end date of an image
                     generated by a temporal composition intersects the given datetime or range it will be included in the
                     result.
    :type datetime: str, optional
    :param ids: Array of Item ids to return. All other filter parameters that further restrict the
                number of search results are ignored, defaults to None
    :type ids: list, optional
    :param collections: Array of Collection IDs to include in the search for items.
                        Only Items in one of the provided Collections will be searched, defaults to None
    :type collections: list, optional
    :param intersects: Searches items by performing intersection between their geometry and provided GeoJSON geometry.
                       All GeoJSON geometry types must be supported., defaults to None
    :type intersects: dict, optional
    :param page: The page offset of results, defaults to 1
    :type page: int, optional
    :param limit: The maximum number of results to return (page size), defaults to 10
    :type limit: int, optional
    :return: list of collectio items
    :rtype: list
    """
    columns = [
        func.concat(Collection.name, "-",
                    Collection.version).label("collection"),
        Collection.collection_type,
        Collection._metadata.label("meta"),
        Item._metadata.label("item_meta"),
        Item.name.label("item"),
        Item.id,
        Item.collection_id,
        Item.start_date.label("start"),
        Item.end_date.label("end"),
        Item.assets,
        Item.created,
        Item.updated,
        cast(Item.cloud_cover, Float).label("cloud_cover"),
        func.ST_AsGeoJSON(Item.geom).label("geom"),
        func.ST_XMin(Item.geom).label("xmin"),
        func.ST_XMax(Item.geom).label("xmax"),
        func.ST_YMin(Item.geom).label("ymin"),
        func.ST_YMax(Item.geom).label("ymax"),
        Tile.name.label("tile"),
    ]

    if roles is None:
        roles = []

    where = [
        Collection.id == Item.collection_id,
        or_(Collection.is_public.is_(True),
            Collection.id.in_([int(r.split(":")[0]) for r in roles])),
    ]

    collections_where = _where_collections(collection_id, collections)
    collections_where.append(
        or_(Collection.is_public.is_(True),
            Collection.id.in_([int(r.split(":")[0]) for r in roles])))
    outer_join = [(Tile, [Item.tile_id == Tile.id])]
    _geom_tables = []
    _collections = Collection.query().filter(*collections_where).all()
    if bbox or intersects:
        grids = GridRefSys.query().filter(
            GridRefSys.id.in_([c.grid_ref_sys_id
                               for c in _collections])).all()
        for grid in grids:
            geom_table = grid.geom_table
            if geom_table is None:
                continue
            _geom_tables.append(geom_table)

    if ids is not None:
        if isinstance(ids, str):
            ids = ids.split(",")
        where += [Item.name.in_(ids)]
    else:
        where += _where_collections(collection_id, collections)

        if item_id is not None:
            where += [Item.name.like(item_id)]

        if query:
            filters = create_query_filter(query)
            if filters:
                where += filters

        if intersects is not None:
            # Intersect with native grid if there is
            geom_expr = func.ST_GeomFromGeoJSON(str(intersects))
            grids_where, joins = intersect_grids(geom_expr,
                                                 geom_tables=_geom_tables)

            where += grids_where
            outer_join += joins
        elif bbox is not None:
            try:
                if isinstance(bbox, str):
                    bbox = bbox.split(",")

                bbox = [float(x) for x in bbox]

                if bbox[0] == bbox[2] or bbox[1] == bbox[3]:
                    raise InvalidBoundingBoxError("")

                geom_expr = func.ST_MakeEnvelope(bbox[0], bbox[1], bbox[2],
                                                 bbox[3],
                                                 func.ST_SRID(Item.geom))
                grid_where, joins = intersect_grids(geom_expr,
                                                    geom_tables=_geom_tables)

                where += grid_where
                outer_join += joins
            except (ValueError, InvalidBoundingBoxError) as e:
                abort(400, f"'{bbox}' is not a valid bbox.")

        if datetime is not None:
            if "/" in datetime:
                matches_open = ("..", "")
                time_start, time_end = datetime.split("/")
                if time_start in matches_open:  # open start
                    date_filter = [
                        or_(Item.start_date <= time_end,
                            Item.end_date <= time_end)
                    ]
                elif time_end in matches_open:  # open end
                    date_filter = [
                        or_(Item.start_date >= time_start,
                            Item.end_date >= time_start)
                    ]
                else:  # closed range
                    date_filter = [
                        or_(
                            and_(Item.start_date >= time_start,
                                 Item.start_date <= time_end),
                            and_(Item.end_date >= time_start,
                                 Item.end_date <= time_end),
                            and_(Item.start_date < time_start,
                                 Item.end_date > time_end),
                        )
                    ]
            else:
                date_filter = [
                    and_(Item.start_date <= datetime,
                         Item.end_date >= datetime)
                ]
            where += date_filter

    query = session.query(*columns)
    for entity, join_conditions in outer_join:
        query = query.outerjoin(entity, *join_conditions)

    try:
        query = query.filter(*where).order_by(Item.start_date.desc(), Item.id)
        result = query.paginate(page=int(page),
                                per_page=int(limit),
                                error_out=False,
                                max_per_page=BDC_STAC_MAX_LIMIT)

        return result
    except Exception as err:
        msg = str(err)
        if hasattr(err, "orig"):
            msg = str(err.orig)
        abort(400, msg.rstrip())
示例#7
0
    def check_scenes(cls, collections: str, start_date: datetime, end_date: datetime,
                     catalog: str = None, dataset: str = None,
                     grid: str = None, tiles: list = None, bbox: list = None, catalog_kwargs=None, only_tiles=False):
        """Check for the scenes in remote provider and compares with the Collection Builder."""
        bbox_list = []
        if grid and tiles:
            grid = GridRefSys.query().filter(GridRefSys.name == grid).first_or_404(f'Grid "{grid}" not found.')
            geom_table = grid.geom_table

            rows = db.session.query(
                geom_table.c.tile,
                func.ST_Xmin(func.ST_Transform(geom_table.c.geom, 4326)).label('xmin'),
                func.ST_Ymin(func.ST_Transform(geom_table.c.geom, 4326)).label('ymin'),
                func.ST_Xmax(func.ST_Transform(geom_table.c.geom, 4326)).label('xmax'),
                func.ST_Ymax(func.ST_Transform(geom_table.c.geom, 4326)).label('ymax'),
            ).filter(geom_table.c.tile.in_(tiles)).all()
            for row in rows:
                bbox_list.append((row.tile, (row.xmin, row.ymin, row.xmax, row.ymax)))
        else:
            bbox_list.append(('', bbox))

        instance, provider = get_provider(catalog)

        collection_map = dict()
        collection_ids = list()

        for _collection in collections:
            collection, version = _collection.split('-')

            collection = Collection.query().filter(
                Collection.name == collection,
                Collection.version == version
            ).first_or_404(f'Collection "{collection}-{version}" not found.')

            collection_ids.append(collection.id)
            collection_map[_collection] = collection

        options = dict(start_date=start_date, end_date=end_date)
        if catalog_kwargs:
            options.update(catalog_kwargs)

        redis = current_app.redis
        output = dict(
            collections={cname: dict(total_scenes=0, total_missing=0, missing_external=[]) for cname in collections}
        )

        items = {cid: set() for cid in collection_ids}
        external_scenes = set()

        for tile, _bbox in bbox_list:
            with redis.pipeline() as pipe:
                if only_tiles:
                    entry = tile
                    options['tile'] = tile
                else:
                    options['bbox'] = _bbox
                    entry = _bbox

                periods = _generate_periods(start_date.replace(tzinfo=None), end_date.replace(tzinfo=None))

                for period_start, period_end in periods:
                    _items = db.session.query(Item.name, Item.collection_id).filter(
                        Item.collection_id.in_(collection_ids),
                        func.ST_Intersects(
                            func.ST_MakeEnvelope(
                                *_bbox, func.ST_SRID(Item.geom)
                            ),
                            Item.geom
                        ),
                        or_(
                            and_(Item.start_date >= period_start, Item.start_date <= period_end),
                            and_(Item.end_date >= period_start, Item.end_date <= period_end),
                            and_(Item.start_date < period_start, Item.end_date > period_end),
                        )
                    ).order_by(Item.name).all()

                    for item in _items:
                        items[item.collection_id].add(item.name)

                    options['start_date'] = period_start.strftime('%Y-%m-%d')
                    options['end_date'] = period_end.strftime('%Y-%m-%d')

                    key = f'scenes:{catalog}:{dataset}:{period_start.strftime("%Y%m%d")}_{period_end.strftime("%Y%m%d")}_{entry}'

                    pipe.get(key)
                    provider_scenes = []

                    if not redis.exists(key):
                        provider_scenes = provider.search(dataset, **options)
                        provider_scenes = [s.scene_id for s in provider_scenes]

                        pipe.set(key, json.dumps(provider_scenes))

                    external_scenes = external_scenes.union(set(provider_scenes))

                cached_scenes = pipe.execute()

                for cache in cached_scenes:
                    # When cache is True, represents set the value were cached.
                    if cache is not None and cache is not True:
                        external_scenes = external_scenes.union(set(json.loads(cache)))

        output['total_external'] = len(external_scenes)
        for _collection_name, _collection in collection_map.items():
            _items = set(items[_collection.id])
            diff = list(external_scenes.difference(_items))

            output['collections'][_collection_name]['total_scenes'] = len(_items)
            output['collections'][_collection_name]['total_missing'] = len(diff)
            output['collections'][_collection_name]['missing_external'] = diff

            for cname, _internal_collection in collection_map.items():
                if cname != _collection_name:
                    diff = list(_items.difference(set(items[_internal_collection.id])))
                    output['collections'][_collection_name][f'total_missing_{cname}'] = len(diff)
                    output['collections'][_collection_name][f'missing_{cname}'] = diff

        return output
示例#8
0
def get_collection_items(collection_id=None,
                         roles=[],
                         item_id=None,
                         bbox=None,
                         time=None,
                         ids=None,
                         collections=None,
                         cubes=None,
                         intersects=None,
                         page=1,
                         limit=10,
                         query=None,
                         **kwargs):
    """Retrieve a list of collection items based on filters.

    :param collection_id: Single Collection ID to include in the search for items.
                          Only Items in one of the provided Collection will be searched, defaults to None
    :type collection_id: str, optional
    :param item_id: item identifier, defaults to None
    :type item_id: str, optional
    :param bbox: bounding box for intersection [west, north, east, south], defaults to None
    :type bbox: list, optional
    :param time: Single date+time, or a range ('/' seperator), formatted to RFC 3339, section 5.6, defaults to None
    :type time: str, optional
    :param ids: Array of Item ids to return. All other filter parameters that further restrict the
                number of search results are ignored, defaults to None
    :type ids: list, optional
    :param collections: Array of Collection IDs to include in the search for items.
                        Only Items in one of the provided Collections will be searched, defaults to None
    :type collections: list, optional
    :param cubes: Bool indicating if only cubes should be returned, defaults to None
    :type cubes: bool, optional
    :param intersects: Searches items by performing intersection between their geometry and provided GeoJSON geometry.
                       All GeoJSON geometry types must be supported., defaults to None
    :type intersects: dict, optional
    :param page: The page offset of results, defaults to 1
    :type page: int, optional
    :param limit: The maximum number of results to return (page size), defaults to 10
    :type limit: int, optional
    :return: list of collectio items
    :rtype: list
    """
    columns = [
        Collection.name.label('collection'),
        Item.name.label('item'),
        Item.start_date.label('start'),
        Item.end_date.label('end'), Item.assets,
        func.ST_AsGeoJSON(Item.geom).label('geom'),
        func.Box2D(Item.geom).label('bbox'),
        Tile.name.label('tile')
    ]

    where = [
        Collection.id == Item.collection_id, Item.tile_id == Tile.id,
        or_(Collection.is_public.is_(True),
            Collection.id.in_([int(r.split(':')[0]) for r in roles]))
    ]

    if ids is not None:
        where += [Item.id.in_(ids.split(','))]
    elif item_id is not None:
        where += [Item.id.like(item_id)]
    else:
        if collections is not None:
            where += [Collection.name.in_(collections.split(','))]
        elif collection_id is not None:
            where += [Collection.name.like(collection_id)]

        if intersects is not None:
            where += [
                func.ST_Intersects(func.ST_GeomFromGeoJSON(str(intersects)),
                                   Item.geom)
            ]

        if query:
            filters = create_query_filter(query)
            if filters:
                where += filters

        if bbox is not None:
            try:
                split_bbox = [float(x) for x in bbox.split(',')]

                where += [
                    func.ST_Intersects(
                        func.ST_MakeEnvelope(split_bbox[0], split_bbox[1],
                                             split_bbox[2], split_bbox[3],
                                             func.ST_SRID(Item.geom)),
                        Item.geom)
                ]
            except:
                raise (
                    InvalidBoundingBoxError(f"'{bbox}' is not a valid bbox.'"))

        if time is not None:
            if "/" in time:
                time_start, time_end = time.split("/")
                time_end = datetime.fromisoformat(time_end)
                where += [
                    or_(Item.end_date <= time_end, Item.start_date <= time_end)
                ]
            else:
                time_start = datetime.fromisoformat(time)
            where += [
                or_(Item.start_date >= time_start, Item.end_date >= time_start)
            ]

    query = session.query(*columns).filter(*where).order_by(
        Item.start_date.desc())

    result = query.paginate(page=int(page),
                            per_page=int(limit),
                            error_out=False,
                            max_per_page=int(BDC_STAC_MAX_LIMIT))

    return result