Пример #1
0
 def test_prepare_doc(self):
     """Relates to issue#62"""
     mock_results = mock.MagicMock()
     mock_results.results = [{}]
     doc = {'type': 'test/foo'}
     with mock.patch('moxie.places.importers.helpers.merge_docs') as mock_merge_docs:
         prepare_document(doc, mock_results, 1)
         mock_merge_docs.assert_called_with(mock_results.results[0], doc, 1)
Пример #2
0
 def run(self):
     conn = sqlite3.connect(self.sbb_db)
     conn.row_factory = dict_factory
     db = conn.cursor()
     if self.indexer:
         docs = []
         sql = "SELECT * FROM station"
         for i, row in enumerate(db.execute(sql).fetchall()):
             data = {}
             data['id'] = "stoparea:%s" % str(row['id'])
             data[self.identifier_key] = [
                 str(row['id']), 'sbb:%s' % row['id']
             ]
             data['location'] = "%s,%s" % (row['x'], row['y'])
             data['name'] = row['name']
             data['name_sort'] = row['name']
             data['type'] = "/transport/rail-station"
             data['tags'] = []
             search_results = self.indexer.search_for_ids(
                 self.identifier_key, data[self.identifier_key])
             docs.append(
                 prepare_document(data, search_results, self.precedence))
             if not (i + 1) % 400:
                 self.indexer.index(docs)
                 self.indexer.commit()
                 docs = []
         self.indexer.index(docs)
         self.indexer.commit()
Пример #3
0
 def run(self):
     parser = make_parser(["xml.sax.IncrementalParser"])
     parser.setContentHandler(self.handler)
     buffered_data = self.naptan_file.read(self.buffer_size)
     while buffered_data:
         parser.feed(buffered_data)
         buffered_data = self.naptan_file.read(self.buffer_size)
     parser.close()
     if self.indexer:
         docs = []
         for stop_area_code, data in self.handler.stop_areas.items():
             search_results = self.indexer.search_for_ids(self.identifier_key, data[self.identifier_key])
             docs.append(prepare_document(data, search_results, self.precedence))
         for atco_code, sp in self.handler.stop_points.items():
             search_results = self.indexer.search_for_ids(self.identifier_key, sp[self.identifier_key])
             docs.append(prepare_document(sp, search_results, self.precedence))
         self.indexer.index(docs)
         self.indexer.commit()
Пример #4
0
 def process_type(self, rdf_type, defined_type):
     """Browse the graph for a certain type and process found subjects
     :param rdf_type: RDF type to find
     :param defined_type: type defining subjects found
     :return list of documents
     """
     objects = []
     for subject in self.graph.subjects(RDF.type, rdf_type):
         try:
             doc = self.process_subject(subject, defined_type)
             if doc:
                 search_results = self.indexer.search_for_ids(self.identifier_key, doc[self.identifier_key])
                 result = prepare_document(doc, search_results, self.precedence)
                 objects.append(result)
         except Exception:
             logger.warning('Could not process subject', exc_info=True,
                            extra={'data': {'subject': subject.toPython()}})
     return objects
Пример #5
0
 def index_library(self, lib):
     ident = "{key}:{value}".format(key=self.lib_data_identifier,
                                    value=lib['id'])
     search_results = self.indexer.search_for_ids(self.identifier_key,
                                                  [ident])
     if search_results.results:
         doc = search_results.results[0]
         doc[self.prefix_index_key+'opening_hours_termtime'] = lib['opening_hours_termtime']
         doc[self.prefix_index_key+'opening_hours_vacation'] = lib['opening_hours_vacation']
         doc[self.prefix_index_key+'opening_hours_closed'] = lib['opening_hours_closed']
         doc[self.prefix_index_key+'subject'] = lib['subjects']
         if 'academic' in lib['policies']:
             doc[self.prefix_index_key+'policy_academic'] = lib['policies']['academic']
         if 'other' in lib['policies']:
             doc[self.prefix_index_key+'policy_other'] = lib['policies']['other']
         if 'postgraduate' in lib['policies']:
             doc[self.prefix_index_key+'policy_postgraduate'] = lib['policies']['postgraduate']
         if 'undergraduate' in lib['policies']:
             doc[self.prefix_index_key+'policy_undergraduate'] = lib['policies']['undergraduate']
         return prepare_document(doc, search_results, self.precedence)
     else:
         logger.info('No results for {ident}'.format(ident=ident))
         return None
Пример #6
0
Файл: osm.py Проект: ox-it/moxie
    def endElement(self, element_type):
        if element_type == 'node':
            location = self.node_location
        elif element_type == 'way':
            min_, max_ = (float('inf'), float('inf')), (float('-inf'), float('-inf'))
            for lat, lon in [self.node_locations[n] for n in self.nodes]:
                min_ = min(min_[0], lat), min(min_[1], lon)
                max_ = max(max_[0], lat), max(max_[1], lon)
            location = (min_[0] + max_[0]) / 2, (min_[1] + max_[1]) / 2
        try:
            if self.tags.get('life_cycle', 'in_use') != 'in_use':
                return

            for key in self.tags.iterkeys():
                if 'disused' in key:
                    # e.g. disused:amenity=restaurant
                    # http://wiki.openstreetmap.org/wiki/Key:disused
                    return

            if element_type in ['way', 'node'] and any([x in self.tags for x in self.element_tags]):
                result = {}
                osm_id = 'osm:%s' % self.id
                atco_id = self.tags.get('naptan:AtcoCode', None)
                result[self.identifier_key] = [osm_id]
                # if it has an ATCO ID, we set the ATCO ID as the main ID for this document
                # instead of the OSM ID
                if atco_id:
                    result['id'] = atco_id
                    result[self.identifier_key].append('atco:%s' % atco_id)
                else:
                    result['id'] = osm_id

                result['tags'] = []
                for it in self.indexed_tags:
                    doc_tags = [t.replace('_', ' ').strip() for t in self.tags.get(it, '').split(';')]
                    if doc_tags and doc_tags != ['']:
                        result['tags'].extend(doc_tags)

                # Filter elements depending on amenity / shop tags
                if 'amenity' in self.tags:
                    if self.tags['amenity'] in AMENITIES:
                        # special case for Park and Rides where amenity=parking and park_ride=bus/yes/... except no
                        # TODO we should be able to handle this kind of case in a better way
                        if self.tags['amenity'] == "parking" and self.tags.get('park_ride', 'no') != 'no':
                            result['type'] = PARK_AND_RIDE
                        else:
                            result['type'] = AMENITIES[self.tags['amenity']]
                    else:
                        return
                elif 'shop' in self.tags:
                    if self.tags['shop'] in SHOPS:
                        result['type'] = SHOPS[self.tags['shop']]
                    else:
                        return
                else:
                    return

                # if the element doesn't have a name, it will be an empty string
                result['name'] = self.tags.get('name', self.tags.get('operator', ''))
                result['name_sort'] = result['name']

                address = "{0} {1} {2} {3}".format(self.tags.get("addr:housename", ""), self.tags.get("addr:housenumber", ""),
                        self.tags.get("addr:street", ""), self.tags.get("addr:postcode", ""))
                result['address'] = " ".join(address.split())

                if 'phone' in self.tags:
                    result['phone'] = format_uk_telephone(self.tags['phone'])

                if 'url' in self.tags:
                    result['website'] = self.tags['url']

                if 'website' in self.tags:
                    result['website'] = self.tags['website']

                if 'opening_hours' in self.tags:
                    result['opening_hours'] = self.tags['opening_hours']

                if 'collection_times' in self.tags:
                    result['collection_times'] = self.tags['collection_times']

                result['location'] = "%s,%s" % location
                search_results = self.indexer.search_for_ids(
                        self.identifier_key, result[self.identifier_key])
                self.pois.append(prepare_document(result, search_results, self.precedence))
        except Exception as e:
            logger.warning("Couldn't index a POI.", exc_info=True)