def parse_video_format(self, record): """ Extract format info from av entity and returns a VideoFormat props. """ node = record.find('./efg:avManifestation/efg:format', self.ns) if node is not None: video_format = {} # gauge (0..1) enum gauge_el = node.find('efg:gauge', self.ns) if gauge_el is not None and gauge_el.text is not None and gauge_el.text.lower( ) != 'n/a': code_el = codelists.fromCode(gauge_el.text.strip(), codelists.GAUGE) if code_el is None: self.warnings.append('Invalid gauge for: ' + gauge_el.text.strip()) else: video_format['gauge'] = code_el[0] # aspectRation (0..1) enum aspect_ratio_el = node.find('efg:aspectRatio', self.ns) if aspect_ratio_el is not None and aspect_ratio_el.text is not None and aspect_ratio_el.text.lower( ) != 'n/a': code_el = codelists.fromCode(aspect_ratio_el.text.strip(), codelists.ASPECT_RATIO) if code_el is None: self.warnings.append('Invalid aspect ratio for: ' + aspect_ratio_el.text.strip()) else: video_format['aspect_ratio'] = code_el[0] # sound (0..1) enum sound_el = node.find('efg:sound', self.ns) if sound_el is not None and sound_el.text is not None and sound_el.text.lower( ) != 'n/a': code_el = codelists.fromDescription(sound_el.text.strip(), codelists.VIDEO_SOUND) if code_el is None: self.warnings.append('Invalid format sound for: ' + sound_el.text.strip()) else: video_format['sound'] = code_el[0] # colour (0..1) colour_el = node.find('efg:colour', self.ns) if colour_el is not None and colour_el.text is not None and colour_el.text.lower( ) != 'n/a': code_el = codelists.fromDescription(colour_el.text.strip(), codelists.COLOUR) if code_el is None: self.warnings.append('Invalid format colour for: ' + colour_el.text.strip()) else: video_format['colour'] = code_el[0] log.debug(video_format) return video_format
def parse_descriptions(self, record): descriptions = [] for node in record.findall("efg:description", self.ns): description = {} dtype = node.get('type') if dtype is not None and dtype.lower() != 'n/a': code_el = codelists.fromDescription( dtype, codelists.DESCRIPTION_TYPES) if code_el is None: self.warnings.append('Invalid description type for: ' + dtype) else: description['description_type'] = code_el[0] lang = node.get('lang') if lang is not None and lang.lower() != 'n/a': lang_val = lang.lower() lang_code = codelists.fromCode(lang_val, codelists.LANGUAGE) if lang_code is None: self.warnings.append('Invalid description language for: ' + lang) else: description['language'] = lang_code[0] description['source_ref'] = node.get('source') description['text'] = node.text.strip() log.debug('description: {}'.format(description)) descriptions.append(description) # october 2018: change: description is optional #if len(descriptions) == 0: # raise ValueError('Description is missing') return descriptions
def get_non_av_type(self, record): node = record.find('./efg:nonAVManifestation/efg:type', self.ns) if node is None: raise ValueError('Non AV type is missing') value = node.text.strip() code_el = codelists.fromCode(value, codelists.NON_AV_TYPES) if code_el is None: raise ValueError('Invalid Non-AV type for: ' + value) return code_el[0]
def parse_titles(self, record, avcreation=False): titles = [] for node in record.findall("efg:title", self.ns): title = {} lang = node.get('lang') if lang is not None and lang.lower() != 'n/a': lang_val = lang.lower() lang_code = codelists.fromCode(lang_val, codelists.LANGUAGE) if lang_code is None: self.warnings.append('Invalid title language for: ' + lang.text) else: title['language'] = lang_code[0] if avcreation: parts = [] for part in node.findall('efg:partDesignation', self.ns): part_unit = part.find('efg:unit', self.ns).text.strip() code_el = codelists.fromCode(part_unit, codelists.AV_TITLE_UNIT) if code_el is None: raise ValueError( 'Invalid part designation unit for: ' + part_unit) parts.append("{} {}".format( code_el[0], part.find('efg:value', self.ns).text.strip())) title['part_designations'] = parts title['text'] = node.find('efg:text', self.ns).text.strip() title_rel = node.find('efg:relation', self.ns) if title_rel is not None and title_rel.text.lower() != 'n/a': code_el = codelists.fromCode(title_rel.text.strip(), codelists.AV_TITLE_TYPES) if code_el is None: self.warnings.append('Invalid title type for: ' + title_rel.text.strip()) else: title['relation'] = code_el[0] log.debug('title: {}'.format(title)) titles.append(title) return titles
def parse_production_contries(self, record): """ Extract country and reference if any. """ countries = [] for node in record.findall("./efg:countryOfReference", self.ns): country = node.text.strip().upper() if codelists.fromCode(country, codelists.COUNTRY) is None: self.warnings.append('Invalid country code for: ' + node.text.strip()) continue reference = node.get('reference') country_reference = [country, reference] log.debug("country: {}, reference: {}".format( country_reference[0], country_reference[1])) countries.append(country_reference) return countries
def parse_languages(self, record, audio_visual=False): """ Extract language and usage if any. It returns an array of arrays as in the following example: [['fr','03'],['fr','25'],['ca','25']] The second nested element corresponds to the usage code in the controlled codelist. """ inpath = 'efg:avManifestation' if audio_visual else 'efg:nonAVManifestation' languages = [] for node in record.findall("./" + inpath + "/efg:language", self.ns): lang = node.text.lower() if lang.lower() == 'n/a': continue lang_code = codelists.fromCode(lang, codelists.LANGUAGE) if lang_code is None: self.warnings.append('Invalid language for: ' + node.text) continue else: lang = lang_code[0] usage = node.get('usage') if usage is not None: if usage.lower() == 'n/a': usage = None else: code_el = codelists.fromDescription( usage, codelists.LANGUAGE_USAGES) if code_el is None: self.warnings.append('Invalid language usage for: ' + usage) usage = None else: usage = code_el[0] lang_usage = [lang, usage] log.debug("lang code: {}, usage code: {}".format( lang_usage[0], lang_usage[1])) languages.append(lang_usage) return languages
def update_meta_stage(self, resource_id, path, metadata_update): """ Update data of MetaStage (that has resource_id) with data coming from xml file in path - If metadata_update=false and creation exists then nothing is updated - If Item and Creation don't exist then it creates them Return xml_resource (the updated MetaStage), group, source_id, item_type, item_node. """ xml_resource = None try: xml_resource = self.graph.MetaStage.nodes.get(uuid=resource_id) if xml_resource is not None: group = GraphBaseOperations.getSingleLinkedNode( xml_resource.ownership) source_id = extract_creation_ref(self, path) if source_id is None: raise Exception( "No source ID found importing metadata file %s" % path) item_type = extract_item_type(self, path) if codelists.fromCode(item_type, codelists.CONTENT_TYPES) is None: raise Exception("Invalid content type for: " + item_type) log.info("Content source ID: {0}; TYPE: {1}".format( source_id, item_type)) # check for existing item item_node = xml_resource.item.single() if item_node is None: item_properties = {} item_properties['item_type'] = item_type item_node = self.graph.Item(**item_properties).save() item_node.ownership.connect(group) item_node.meta_source.connect(xml_resource) item_node.save() log.debug("Item resource created for resource_id=%s" % resource_id) # check for existing creation creation = item_node.creation.single() if creation is not None and not metadata_update: log.info("Skip updating metadata for resource_id=%s" % resource_id) else: # update metadata log.debug("Updating metadata for resource_id=%s" % resource_id) xml_resource.warnings = extract_descriptive_metadata( self, path, item_type, item_node) log.info("Metadata updated for resource_id=%s" % resource_id) xml_resource.status = 'COMPLETED' xml_resource.status_message = 'Nothing to declare' xml_resource.save() else: log.warning("Not found MetaStage for resource_id=%s" % resource_id) except Exception as e: log.error("Failed update of resource_id %s, Error: %s" % (resource_id, e)) if xml_resource is not None: xml_resource.status = 'ERROR' xml_resource.status_message = str(e) xml_resource.save() raise e return xml_resource, group, source_id, item_type, item_node
def parse_keywords(self, record): keywords = [] for node in record.findall("efg:keywords", self.ns): for term in node.findall('efg:term', self.ns): keyword = {} ktype = node.get('type') if ktype is not None and ktype.lower() != 'n/a': # filter ktype with value 'Project' if ktype == 'Project': continue code_el = codelists.fromDescription( ktype, codelists.KEYWORD_TYPES) if code_el is None: self.warnings.append('Invalid keyword type for: ' + ktype) else: keyword['keyword_type'] = code_el[0] log.debug('keyword [type]: %s' % keyword['keyword_type']) lang = node.get('lang') if lang is not None and lang.lower() != 'n/a': lang_val = lang.lower() lang_code = codelists.fromCode(lang_val, codelists.LANGUAGE) if lang_code is None: self.warnings.append('Invalid keyword language for: ' + lang.text) else: keyword['language'] = lang_code[0] log.debug('language: {}'.format(keyword['language'])) if ktype == 'Form': # check term from a controlled IMC list if term.text.lower() == 'n/a': continue code_el = codelists.fromCode(term.text.strip(), codelists.FORM) if code_el is None: self.warnings.append('Invalid form type for: ' + term.text.strip()) continue else: keyword['term'] = code_el[0] else: keyword['term'] = term.text.strip() log.debug('keyword: {}'.format(keyword['term'])) #log.debug('term id: %s' % term.get('id')) if term.get('id') is not None: # check keyword term id is integer (keyword term id is optional) try: int(term.get('id')) keyword['termID'] = term.get('id') except Exception: self.warnings.append('Invalid keyword term id for: ' + term.get('id') + '. Expected integer.') else: keyword['termID'] = None keyword['schemeID'] = node.get('scheme') keywords.append(keyword) return keywords
def post(self): self.initGraph() input_parameters = self.get_input() offset, limit = self.get_paging() offset -= 1 logger.debug("paging: offset {0}, limit {1}".format(offset, limit)) if offset < 0: raise RestApiException('Page number cannot be a negative value', status_code=hcodes.HTTP_BAD_REQUEST) if limit < 0: raise RestApiException('Page size cannot be a negative value', status_code=hcodes.HTTP_BAD_REQUEST) filters = [] starters = [] projections = [] order_by = '' filtering = input_parameters.get('filter') if filtering is not None: anno_type = filtering.get('type') if anno_type is None: raise RestApiException('Annotation type cannot be empty', status_code=hcodes.HTTP_BAD_REQUEST) if anno_type not in self.__class__.allowed_anno_types: raise RestApiException( "Bad annotation type parameter: expected one of %s" % (self.__class__.allowed_anno_types, ), status_code=hcodes.HTTP_BAD_REQUEST) filters.append("WHERE anno.annotation_type='{anno_type}'".format( anno_type=anno_type)) # add filter for processed content with COMPLETE status filters.append( "MATCH (creation:Creation)<-[:CREATION]-(:Item)-[:CONTENT_SOURCE]->(content:ContentStage) " + "WHERE content.status = 'COMPLETED' ") filters.append( 'MATCH (title:Title)<-[:HAS_TITLE]-(creation)<-[:CREATION]-(i:Item)<-[:SOURCE]-(anno)' ) projections.append( 'collect(distinct creation{.*, type:i.item_type, titles }) AS creations' ) if anno_type == 'TAG': # look for geo distance filter geo_distance = filtering.get('geo_distance') if geo_distance is not None: distance = geo_distance['distance'] location = geo_distance['location'] starters.append( "WITH point({{longitude: {lon}, latitude: {lat} }}) as cityPosition, " "{dist} as distanceInMeters".format( lon=location['long'], lat=location['lat'], dist=distance)) filters.append( "MATCH (anno)-[:HAS_BODY]-(body:ResourceBody) " "WHERE body.spatial IS NOT NULL AND " "distance(cityPosition, point({latitude:body.spatial[0], longitude:body.spatial[1]})) < distanceInMeters" ) projections.append( "distance(cityPosition, point({longitude:body.spatial[0],latitude:body.spatial[1]})) as distance" ) order_by = "ORDER BY distance" creation = filtering.get('creation') if creation is not None: c_match = creation.get('match') if c_match is not None: term = c_match.get('term') if term is not None: term = self.graph.sanitize_input(term) multi_match = [] multi_match_where = [] multi_match_query = '' fields = c_match.get('fields') if term is not None and (fields is None or len(fields) == 0): raise RestApiException( 'Match term fields cannot be empty', status_code=hcodes.HTTP_BAD_REQUEST) if fields is None: fields = [] multi_match_fields = [] multi_optional_match = [] for f in fields: if f not in self.__class__.allowed_term_fields: raise RestApiException( "Bad field: expected one of %s" % (self.__class__.allowed_term_fields, ), status_code=hcodes.HTTP_BAD_REQUEST) if not term: # catch '*' break if f == 'title': multi_match.append( "MATCH (creation)-[:HAS_TITLE]->(t:Title)") multi_match_fields.append('t') multi_match_where.append( "t.text =~ '(?i).*{term}.*'".format(term=term)) elif f == 'description': multi_match.append( "OPTIONAL MATCH (creation)-[:HAS_DESCRIPTION]->(d:Description)" ) multi_match_fields.append('d') multi_match_where.append( "d.text =~ '(?i).*{term}.*'".format(term=term)) elif f == 'keyword': multi_optional_match.append( "OPTIONAL MATCH (creation)-[:HAS_KEYWORD]->(k:Keyword)" ) multi_match_fields.append('k') multi_match_where.append( "k.term =~ '(?i){term}'".format(term=term)) elif f == 'contributor': multi_optional_match.append( "OPTIONAL MATCH (creation)-[:CONTRIBUTED_BY]->(a:Agent)" ) multi_match_fields.append('a') multi_match_where.append( "ANY(item in a.names where item =~ '(?i).*{term}.*')" .format(term=term)) else: # should never be reached raise RestApiException( 'Unexpected field type', status_code=hcodes.HTTP_SERVER_ERROR) if len(multi_match) > 0: multi_match_query = ' '.join(multi_match) \ + " " + ' '.join(multi_optional_match) \ + " WITH creation, cityPosition, title, i, body, " + ', '.join(multi_match_fields) \ + " WHERE " + ' OR '.join(multi_match_where) # logger.debug(multi_match_query) filters.append(multi_match_query) c_filter = creation.get('filter') # TYPE c_type = c_filter.get('type', 'all') c_type = c_type.strip().lower() if c_type not in self.__class__.allowed_item_types: raise RestApiException( "Bad item type parameter: expected one of %s" % (self.__class__.allowed_item_types, ), status_code=hcodes.HTTP_BAD_REQUEST) if c_type != 'all': filters.append( "MATCH (i) WHERE i.item_type =~ '(?i){c_type}'".format( c_type=c_type)) # PROVIDER c_provider = c_filter.get('provider') if c_provider is not None: filters.append( "MATCH (creation)-[:RECORD_SOURCE]->(:RecordSource)-[:PROVIDED_BY]->(p:Provider)" " WHERE p.identifier='{provider}'".format( provider=c_provider.strip())) # IPR STATUS c_iprstatus = c_filter.get('iprstatus') if c_iprstatus is not None: c_iprstatus = c_iprstatus.strip() if codelists.fromCode(c_iprstatus, codelists.RIGHTS_STATUS) is None: raise RestApiException( 'Invalid IPR status code for: ' + c_iprstatus) filters.append( "MATCH (creation) WHERE creation.rights_status = '{iprstatus}'" .format(iprstatus=c_iprstatus)) # PRODUCTION YEAR c_year_from = c_filter.get('yearfrom') c_year_to = c_filter.get('yearto') if c_year_from is not None or c_year_to is not None: # set defaults if year is missing c_year_from = '1890' if c_year_from is None else str( c_year_from) c_year_to = '1999' if c_year_to is None else str(c_year_to) date_clauses = [] if c_type == 'video' or c_type == 'all': date_clauses.append( "ANY(item IN creation.production_years WHERE item >= '{yfrom}') " "AND ANY(item IN creation.production_years WHERE item <= '{yto}')" .format(yfrom=c_year_from, yto=c_year_to)) if c_type == 'image' or c_type == 'text' or c_type == 'all': date_clauses.append( "ANY(item IN creation.date_created WHERE substring(item, 0, 4) >= '{yfrom}') " "AND ANY(item IN creation.date_created WHERE substring(item, 0 , 4) <= '{yto}')" .format(yfrom=c_year_from, yto=c_year_to)) filters.append("MATCH (creation) WHERE {clauses}".format( clauses=' or '.join(date_clauses))) # ANNOTATED TERMS terms = c_filter.get('terms') if terms: term_clauses = [] iris = [term['iri'] for term in terms if 'iri' in term] if iris: term_clauses.append( 'term.iri IN {iris}'.format(iris=iris)) free_terms = [ term['label'] for term in terms if 'iri' not in term and 'label' in term ] if free_terms: term_clauses.append( 'term.value IN {free_terms}'.format( free_terms=free_terms)) if term_clauses: filters.append( "MATCH (i)<-[:SOURCE]-(anno2)-[:HAS_BODY]-(term) WHERE {clauses}" .format(clauses=' or '.join(term_clauses))) # first request to get the number of elements to be returned countv = "{starters} MATCH (anno:Annotation)" \ " {filters} " \ " RETURN COUNT(DISTINCT body)".format( starters=' '.join(starters), filters=' '.join(filters)) # get total number of elements numels = [row[0] for row in self.graph.cypher(countv)][0] logger.debug("Number of elements retrieved: {0}".format(numels)) query = "{starters} MATCH (anno:Annotation)" \ " {filters} " \ "WITH body, i, cityPosition, creation, collect(distinct title) AS titles " \ "RETURN DISTINCT body, {projections} {orderBy}".format( starters=' '.join(starters), filters=' '.join(filters), projections=', '.join(projections), orderBy=order_by) # logger.debug(query) data = [] result = self.graph.cypher(query) for row in result: # AD-HOC implementation at the moment body = self.graph.ResourceBody.inflate(row[0]) res = {'iri': body.iri, 'name': body.name, 'spatial': body.spatial} res['sources'] = [] for source in row[1]: creation = { 'uuid': source['uuid'], 'external_ids': source['external_ids'], 'rights_status': source['rights_status'], 'type': source['type'] } # PRODUCTION YEAR: get the first year in the array if 'production_years' in source: creation['year'] = source['production_years'][0] elif 'date_created' in source: creation['year'] = source['date_created'][0] # TITLE if 'identifying_title' in source: creation['title'] = source['identifying_title'] elif 'titles' in source and len(source['titles']) > 0: # at the moment get the first always! title_node = self.graph.Title.inflate(source['titles'][0]) creation['title'] = title_node.text res['sources'].append(creation) res['distance'] = row[2] # creator = self.graph.User.inflate(row[3]) # res['creator'] = { # 'uuid': creator.uuid, # 'name': creator.surname + ', ' + creator.name # } data.append(res) meta_response = {"totalItems": numels} return self.force_response(data, meta=meta_response)
def post(self): self.graph = self.get_service_instance('neo4j') input_parameters = self.get_input() offset, limit = self.get_paging() offset -= 1 logger.debug("paging: offset {0}, limit {1}".format(offset, limit)) if offset < 0: raise RestApiException('Page number cannot be a negative value', status_code=hcodes.HTTP_BAD_REQUEST) if limit < 0: raise RestApiException('Page size cannot be a negative value', status_code=hcodes.HTTP_BAD_REQUEST) # check request for term matching provider = None # TODO: no longer used, to be removed multi_match_query = '' # multi_match = [] # multi_match_where = [] # match = input_parameters.get('match') # if match is not None: # term = match.get('term') # if term is not None: # term = self.graph.sanitize_input(term) # fields = match.get('fields') # if term is not None and (fields is None or len(fields) == 0): # raise RestApiException('Match term fields cannot be empty', # status_code=hcodes.HTTP_BAD_REQUEST) # if fields is None: # fields = [] # multi_match_fields = [] # multi_optional_match = [] # for f in fields: # if f not in self.__class__.allowed_term_fields: # raise RestApiException( # "Bad field: expected one of %s" % # (self.__class__.allowed_term_fields, ), # status_code=hcodes.HTTP_BAD_REQUEST) # if not term: # # catch '*' # break # if f == 'title': # multi_match.append("MATCH (n)-[:HAS_TITLE]->(t:Title)") # multi_match_fields.append('t') # multi_match_where.append( # "t.text =~ '(?i).*{term}.*'".format(term=term)) # elif f == 'description': # multi_match.append( # "OPTIONAL MATCH (n)-[:HAS_DESCRIPTION]->(d:Description)") # multi_match_fields.append('d') # multi_match_where.append( # "d.text =~ '(?i).*{term}.*'".format(term=term)) # elif f == 'keyword': # multi_optional_match.append("OPTIONAL MATCH (n)-[:HAS_KEYWORD]->(k:Keyword)") # multi_match_fields.append('k') # multi_match_where.append( # "k.term =~ '(?i){term}'".format(term=term)) # elif f == 'contributor': # multi_optional_match.append("OPTIONAL MATCH (n)-[:CONTRIBUTED_BY]->(a:Agent)") # multi_match_fields.append('a') # multi_match_where.append( # "ANY(item in a.names where item =~ '(?i).*{term}.*')".format(term=term)) # else: # # should never be reached # raise RestApiException( # 'Unexpected field type', # status_code=hcodes.HTTP_SERVER_ERROR) # if len(multi_match) > 0: # multi_match_query = ' '.join(multi_match) \ # + " " + ' '.join(multi_optional_match) \ # + " WITH n, " + ', '.join(multi_match_fields) \ # + " WHERE " + ' OR '.join(multi_match_where) # check request for filtering filters = [] # add filter for processed content with COMPLETE status filters.append( "MATCH (n)<-[:CREATION]-(:Item)-[:CONTENT_SOURCE]->(content:ContentStage) " + "WHERE content.status = 'COMPLETED'") entity = 'Creation' filtering = input_parameters.get('filter') if filtering is not None: # check item type item_type = filtering.get('type', 'all') if item_type is None: item_type = 'all' else: item_type = item_type.strip().lower() if item_type not in self.__class__.allowed_item_types: raise RestApiException( "Bad item type parameter: expected one of %s" % (self.__class__.allowed_item_types, ), status_code=hcodes.HTTP_BAD_REQUEST) if item_type == 'all': entity = 'Creation' elif item_type == 'video': entity = 'AVEntity' elif item_type == 'image': entity = 'NonAVEntity' else: # should never be reached raise RestApiException('Unexpected item type', status_code=hcodes.HTTP_SERVER_ERROR) # PROVIDER provider = filtering.get('provider') logger.info("provider {0}".format(provider)) #if provider is not None: # filters.append( # "MATCH (n)-[:RECORD_SOURCE]->(:RecordSource)-[:PROVIDED_BY]->(p:Provider)" + # " WHERE p.identifier='{provider}'".format(provider=provider.strip())) # CITY city = filtering.get('city') if city is not None: filters.append( "MATCH (n)-[:RECORD_SOURCE]->(:RecordSource)-[:PROVIDED_BY]->(p:Provider)" + " WHERE p.city='{city}'".format(city=city.strip())) logger.info("city {0}".format(city)) # COUNTRY country = filtering.get('country') if country is not None: country = country.strip().upper() if codelists.fromCode(country, codelists.COUNTRY) is None: raise RestApiException('Invalid country code for: ' + country) filters.append( "MATCH (n)-[:COUNTRY_OF_REFERENCE]->(c:Country) WHERE c.code='{country_ref}'" .format(country_ref=country)) # IPR STATUS iprstatus = filtering.get('iprstatus') if iprstatus is not None: iprstatus = iprstatus.strip() if codelists.fromCode(iprstatus, codelists.RIGHTS_STATUS) is None: raise RestApiException('Invalid IPR status code for: ' + iprstatus) filters.append( "MATCH (n) WHERE n.rights_status = '{iprstatus}'".format( iprstatus=iprstatus)) # PRODUCTION YEAR RANGE missingDate = filtering.get('missingDate') # logger.debug("missingDate: {0}".format(missingDate)) if not missingDate: year_from = filtering.get('yearfrom') year_to = filtering.get('yearto') if year_from is not None or year_to is not None: # set defaults if year is missing year_from = '1890' if year_from is None else str(year_from) year_to = '1999' if year_to is None else str(year_to) # FIXME: this DO NOT work with image date_clauses = [] if item_type == 'video' or item_type == 'all': date_clauses.append( "ANY(item in n.production_years where item >= '{yfrom}') " "and ANY(item in n.production_years where item <= '{yto}')" .format(yfrom=year_from, yto=year_to)) if item_type == 'image' or item_type == 'all': date_clauses.append( "ANY(item in n.date_created where substring(item, 0, 4) >= '{yfrom}') " "and ANY(item in n.date_created where substring(item, 0 , 4) <= '{yto}')" .format(yfrom=year_from, yto=year_to)) filters.append("MATCH (n) WHERE {clauses}".format( clauses=' or '.join(date_clauses))) # TERMS terms = filtering.get('terms') if terms: term_clauses = [] iris = [ term['iri'] for term in terms if 'iri' in term and term['iri'] is not None ] if iris: term_clauses.append('body.iri IN {iris}'.format(iris=iris)) free_terms = [ term['label'] for term in terms if 'iri' not in term or term['iri'] is None and 'label' in term ] if free_terms: term_clauses.append('body.value IN {free_terms}'.format( free_terms=free_terms)) if term_clauses: filters.append( "MATCH (n)<-[:CREATION]-(i:Item)<-[:SOURCE]-(tag:Annotation {{annotation_type:'TAG'}})-[:HAS_BODY]-(body) " "WHERE {clauses}".format( clauses=' or '.join(term_clauses))) match = input_parameters.get('match') fulltext = None if match is not None: term = match.get('term') if term is not None: term = self.graph.sanitize_input(term) term = self.graph.fuzzy_tokenize(term) fulltext = """ CALL db.index.fulltext.queryNodes("titles", '{term}') YIELD node, score WITH node, score MATCH (n:{entity})-[:HAS_TITLE|HAS_DESCRIPTION|HAS_KEYWORD]->(node) """.format(term=term, entity=entity) # RETURN node, n, score # first request to get the number of elements to be returned if fulltext is not None: countv = "{fulltext} {filters} RETURN COUNT(DISTINCT(n))".format( fulltext=fulltext, filters=' '.join(filters)) query = "{fulltext} {filters} " \ "RETURN DISTINCT(n) SKIP {offset} LIMIT {limit}".format( fulltext=fulltext, filters=' '.join(filters), offset=offset * limit, limit=limit) else: countv = "MATCH (n:{entity})" \ " {filters} " \ " {match} " \ " RETURN COUNT(DISTINCT(n))".format( entity=entity, filters=' '.join(filters), match=multi_match_query) query = "MATCH (n:{entity})" \ " {filters} " \ " {match} " \ "RETURN DISTINCT(n) SKIP {offset} LIMIT {limit}".format( entity=entity, filters=' '.join(filters), match=multi_match_query, offset=offset * limit, limit=limit) # logger.debug("QUERY to get number of elements: {0}".format(countv)) # get total number of elements numels = [row[0] for row in self.graph.cypher(countv)][0] logger.debug("Number of elements retrieved: {0}".format(numels)) # logger.debug(query) data = [] result = self.graph.cypher(query) api_url = get_api_url(request, PRODUCTION) for row in result: if 'AVEntity' in row[0].labels: v = self.graph.AVEntity.inflate(row[0]) elif 'NonAVEntity' in row[0].labels: v = self.graph.NonAVEntity.inflate(row[0]) else: # should never be reached raise RestApiException('Unexpected item type', status_code=hcodes.HTTP_SERVER_ERROR) item = v.item.single() if isinstance(v, self.graph.AVEntity): # video video_url = api_url + 'api/videos/' + v.uuid video = self.getJsonResponse(v, max_relationship_depth=1, relationships_expansion=[ 'record_sources.provider', 'item.ownership', 'item.revision' ]) logger.debug("video links %s" % video['links']) video['links']['self'] = video_url video['links']['content'] = video_url + '/content?type=video' if item.thumbnail is not None: video['links']['thumbnail'] = video_url + \ '/content?type=thumbnail' video['links']['summary'] = video_url + '/content?type=summary' data.append(video) elif isinstance(v, self.graph.NonAVEntity): # image image_url = api_url + 'api/images/' + v.uuid image = self.getJsonResponse( v, max_relationship_depth=1, relationships_expansion=[ 'record_sources.provider', 'item.ownership', # 'titles.creation', # 'keywords.creation', # 'descriptions.creation', ]) logger.debug("image links %s" % image['links']) image['links']['self'] = image_url image['links']['content'] = image_url + '/content?type=image' if item.thumbnail is not None: image['links']['thumbnail'] = image_url + \ '/content?type=thumbnail' image['links']['summary'] = image_url + '/content?type=summary' data.append(image) # return also the total number of elements meta_response = {"totalItems": numels} # count result by provider if provider == null if provider is None: count_by_provider_query = "MATCH (n:{entity})" \ " {filters} " \ " {match} " \ "MATCH (n)-[:RECORD_SOURCE]->(r:RecordSource)-[:PROVIDED_BY]->(p:Provider) " \ "WITH distinct p, count(distinct n) as numberOfCreations " \ "RETURN p.identifier, numberOfCreations".format( entity=entity, filters=' '.join(filters), match=multi_match_query) # logger.debug(count_by_provider_query) result_p_count = self.graph.cypher(count_by_provider_query) group_by_providers = {} for row in result_p_count: group_by_providers[row[0]] = row[1] # logger.debug(group_by_providers) meta_response["countByProviders"] = group_by_providers # count result by year count_by_year_query = "MATCH (n:{entity})" \ " {filters} " \ " {match} " \ "WITH distinct n WITH collect(substring(head(n.production_years), 0, 3)) + collect(substring(head(n.date_created), 0, 3)) as years " \ "UNWIND years as row " \ "RETURN row + '0' as decade, count(row) as count order by decade".format( entity=entity, filters=' '.join(filters), match=multi_match_query) # logger.debug(count_by_year_query) result_y_count = self.graph.cypher(count_by_year_query) group_by_years = {} for row in result_y_count: group_by_years[row[0]] = row[1] meta_response["countByYears"] = group_by_years return self.force_response(data, meta=meta_response)