def bulk_load(config): """Bulk load entities from a CSV file or SQL database. This is done by mapping the rows in the source data to entities and links which can be understood by the entity index. """ for foreign_id, data in config.items(): collection = Collection.by_foreign_id(foreign_id) if collection is None: collection = Collection.create({ 'foreign_id': foreign_id, 'managed': True, 'label': data.get('label') or foreign_id, 'summary': data.get('summary'), 'category': data.get('category'), }) for role_fk in dict_list(data, 'roles', 'role'): role = Role.by_foreign_id(role_fk) if role is not None: Permission.grant(collection, role, True, False) else: log.warning("Could not find role: %s", role_fk) db.session.commit() update_collection(collection) for query in dict_list(data, 'queries', 'query'): load_query(collection, query)
def __init__(self, mapper, name, data, schema): self.mapper = mapper self.name = name self.data = data self.schema = schema self.refs = dict_list(data, 'column', 'columns') self.literals = dict_list(data, 'literal', 'literals') self.join = data.get('join') # this is hacky, trying to generate refs from template self.template = data.get('template') if self.template is not None: self.formatter = Formatter(self.template) self.refs.extend(self.formatter.refs)
def __init__(self, dataset, data): super(DBQuery, self).__init__(dataset, data) tables = dict_list(data, 'table', 'tables') self.database_uri = os.path.expandvars(data.get('database')) self.tables = [QueryTable(self, f) for f in tables]
def __init__(self, dataset, data): super(CSVQuery, self).__init__(dataset, data) self.csv_urls = set() for csv_url in dict_list(data, 'csv_url', 'csv_urls'): self.csv_urls.add(os.path.expandvars(csv_url)) if not len(self.csv_urls): log.warning("[%s]: no CSV URLs specified", dataset.name)
def mapping_process(id): collection = get_db_collection(id, request.authz.WRITE) require(request.authz.is_admin) if not request.is_json: raise BadRequest() data = request.get_json().get(collection.foreign_id) for query in dict_list(data, 'queries', 'query'): try: model.make_mapping(query) bulk_load_query.apply_async([collection.id, query], priority=6) except InvalidMapping as invalid: raise BadRequest(invalid) return ('', 204)
def bulk_load(config): """Bulk load entities from a CSV file or SQL database. This is done by mapping the rows in the source data to entities and links which can be understood by the entity index. """ from aleph.logic.collections import create_collection for foreign_id, data in config.items(): data['foreign_id'] = foreign_id data['label'] = data.get('label', foreign_id) collection = create_collection(data) for query in dict_list(data, 'queries', 'query'): bulk_load_query.apply_async([collection.id, query], priority=6)
def __init__(self, name, data): self.name = six.text_type(name) self.data = data self.label = data.get('label', name) self.info_url = data.get('info_url') self.category = data.get('category') self.roles = [] self.entities_count = None self.public = False for role in dict_list(data, 'roles', 'role'): role_id = Role.load_id(role) if role_id is not None: self.roles.append(role_id) else: log.warning("Could not find role: %s", role) if role_id in get_public_roles(): self.public = True if not len(self.roles): raise ValueError("No roles for dataset: %s" % self.name) self._queries = dict_list(data, 'queries', 'query')
def __init__(self, query, data): self.query = query self.data = data self.keys = dict_list(data, 'keys', 'key') self.key_fingerprint = data.get('key_fingerprint', False) self.schema = schemata.get(data.get('schema')) if self.schema is None or self.schema.section != self.section: raise TypeError("Invalid schema: %r" % data.get('schema')) self.properties = [] for name, prop in data.get('properties', {}).items(): schema = self.schema.get(name) self.properties.append(MapperProperty(self, name, prop, schema))
def __init__(self, dataset, data): self.dataset = dataset self.data = data self.database_uri = os.path.expandvars(data.get('database')) tables = dict_list(data, 'table', 'tables') self.tables = [QueryTable(self, f) for f in tables] self.entities = [] for ename, edata in data.get('entities').items(): self.entities.append(EntityMapper(self, ename, edata)) self.links = [] for ldata in data.get('links', []): self.links.append(LinkMapper(self, ldata))
def bulk_load(config): """Bulk load entities from a CSV file or SQL database. This is done by mapping the rows in the source data to entities and links which can be understood by the entity index. """ for foreign_id, data in config.items(): collection = Collection.by_foreign_id(foreign_id) if collection is None: data['foreign_id'] = foreign_id data['label'] = data.get('label', foreign_id) collection = Collection.create(data) db.session.commit() index_collection(collection) for query in dict_list(data, 'queries', 'query'): bulk_load_query.apply_async([collection.id, query], priority=6)
def bulk_load(config): """Bulk load entities from a CSV file or SQL database. This is done by mapping the rows in the source data to entities and links which can be understood by the entity index. """ for foreign_id, data in config.items(): collection = Collection.by_foreign_id(foreign_id) if collection is None: collection = Collection.create({ 'foreign_id': foreign_id, 'label': data.get('label') or foreign_id, 'summary': data.get('summary'), 'category': data.get('category'), 'managed': True, }) db.session.commit() index_collection(collection) for query in dict_list(data, 'queries', 'query'): bulk_load_query.delay(collection.id, query)
def __init__(self, schemata, section, name, data): assert section in self.SECTIONS, section self._schemata = schemata self.section = section self.name = name self.data = data self.label = data.get('label', name) self.plural = data.get('plural', self.label) self.icon = data.get('icon') # Do not show in listings: self.hidden = data.get('hidden', False) # Try to perform fuzzy matching. Fuzzy similarity search does not # make sense for entities which have a lot of similar names, such # as land plots, assets etc. self.fuzzy = data.get('fuzzy', True) self._extends = dict_list(data, 'extends') self._own_properties = [] for name, prop in data.get('properties', {}).items(): self._own_properties.append(SchemaProperty(self, name, prop)) self.forward = data.get('forward', self.label) self.reverse = data.get('reverse', self.label)