def rdfStarLinkGenerator_fromCSV(link_predicate: str, result_batch, offset=0): errors = "" vars_size = 0 buffer = Buffer() vars_dic = defaultdict(int) for count, row in enumerate(result_batch): if True: # THE FIRST LINE IS ASSUMED TO BE THE HEADER if count > 0 and len(row) > 1: # GET THE SOURCE AND TARGET URIS src_data, trg_data = row[0], row[1] # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"{space}### LINK Nbr: {count + offset}\n" F"{space}<<<{src_data}> {link_predicate} <{trg_data}>>>\n" if len(vars_dic) > 0 else F"{space}<{src_data}> {link_predicate} <{trg_data}> .\n") # ANNOTATION OF THE LINK # ll_val:has-link-validation "not_validated" . for counter, (predicate, index) in enumerate(vars_dic.items()): end = ".\n" if counter == vars_size - 1 else ";" # APPENDING THE CLUSTER SIZE # if clusters and predicate == VoidPlus.cluster_ID_ttl and int(row[index]) in clusters: # buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}" # F"{Literal(clusters[int(row[index])]['size']).n3(MANAGER)} ;\n") # APPENDING THE VALIDATION FLAG # if predicate == VoidPlus.has_validation_flag_ttl: # triple_value = validate.get_resource[row[index]] # APPENDING THE VALIDATION FLAG RESOURCE if predicate == VoidPlus.has_validation_ttl: small = src_data if src_data < trg_data else trg_data big = trg_data if small == src_data else src_data key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) # buffer.write(F"{space * 2}{VoidPlus.has_validation_ttl:{Vars.PRED_SIZE}}{triple_value} {end}\n") # APPENDING THE CLUSTER ID AS A RESOURCE elif predicate == VoidPlus.cluster_ID_ttl: cluster_id = int(row[index]) triple_value = Rsc.cluster_ttl(cluster_id) # clusters[cluster_id]['item'].extend([src_data, trg_data]) # APPENDING ANYTHING ELSE else: triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \ if Grl.isDecimalLike(row[index]) \ else Literal(row[index]).n3(MANAGER) buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") # buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}" # F"{validate.get_resource[row[index]] if not Grl.isDecimalLike(row[index]) else round(float(row[index]), 5)} {end}\n") yield buffer.getvalue() clearBuffer(buffer) else: # THE CSV HEADER # Star at position for column in range(2, len(row)): if row[column] in CSV_HEADERS: vars_dic[CSV_HEADERS[row[column]]] = column vars_size += 1
def standardLinkGenerator2(link_predicate: str, result_batch, namespace, clusters=None, offset=0): """ :param offset : an integer to increment the counting of tghe links :param link_predicate : a turtle representation of a URI (e.i: owl:sameAs). :param namespace : a dictionary for namespace :param result_batch : an iterable object with link results. :param clusters : a dictionary proving the size of the clusters links. :return : Yields a string as set of triples. """ errors = "" vars_size = 0 buffer = Buffer() vars_dic = defaultdict(int) for count, row in enumerate(result_batch): try: # THE FIRST LINE IS ASSUMED TO BE THE HEADER if count > 0 and len(row) > 1: # GET THE SOURCE AND TARGET URIS src_data, trg_data, predicate = uri2ttl(row[0], namespace)["short"], \ uri2ttl(row[1], namespace)["short"], \ uri2ttl(link_predicate, namespace)["short"] print(src_data) # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n" F"{space}{src_data} {Rsc.ga_resource_ttl(predicate)} {trg_data} .\n") # STANDARD REIFICATION link = F"{space}{src_data} {Rsc.ga_resource_ttl(predicate)} {trg_data} .\n" code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(link)}") buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" F"\n{space}{code}\n" F"{space}{preVal('a', 'rdf:Statement')}" F"{space}{preVal('rdf:predicate', predicate)}" F"{space}{preVal('rdf:subject', F'{src_data}')}" F"{space}{preVal('rdf:object', F'{trg_data}')}") # ANNOTATION OF THE LINK USING THE REIFIED CODE for counter, (predicate, index) in enumerate(vars_dic.items()): end = ".\n" if counter == vars_size - 1 else ";" # APPENDING THE CLUSTER SIZE if clusters and predicate == VoidPlus.cluster_ID_ttl and row[index] in clusters: buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}" F"{Literal(clusters[row[index]]).n3(MANAGER)} ;\n") # APPENDING THE VALIDATION FLAG if predicate == VoidPlus.has_validation_status_ttl: triple_value = validate.get_resource[row[index]] # APPENDING DING ANYTHING ELSE else: triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \ if Grl.isDecimalLike(row[index]) \ else Literal(row[index]).n3(MANAGER) buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer) else: # THE CSV HEADER # Star at position # MAPPING THE CSV HEADERS for column in range(2, len(row)): header = row if row[column] in CSV_HEADERS: vars_dic[CSV_HEADERS[row[column]]] = column vars_size += 1 except Exception as err: errors += F">>>> [ERROR FROM csv_2_linkset] {row}, {err}" print(errors)
def rdfStarLinkGenerator(mappings: dict, link_predicate: str, result_batch, offset=0): errors = "" buffer = Buffer() def ns_modification(uri): for ns in mappings: if uri.startswith(ns): uri = uri.replace(ns, F"{mappings[ns]}:") break if uri.__contains__("://"): uri = F"<{uri}>" return uri for count, link in enumerate(result_batch): try: # GET THE SOURCE AND TARGET URIS src_data, trg_data = ns_modification(link['source']), ns_modification(link['target']) # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"{space}### LINK Nbr: {count + offset}\n" F"{space}<<{src_data} {link_predicate} {trg_data}>>\n") # ANNOTATION OF THE LINK # ll_val:has-link-validation "not_validated" . for counter, (feature, value) in enumerate(link.items()): end = ".\n" if counter == len(link) - 1 else ";" current_property = JSON_HEADERS.get(feature, None) if current_property: # APPENDING THE VALIDATION FLAG RESOURCE if current_property == VoidPlus.has_validation_ttl: small = link['source'] if link['source'] < link['target'] else link['target'] big = link['target'] if small == link['source'] else link['source'] key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) if key is not None else key # NOT APPENDING THE CLUSTER INT ID elif current_property == VoidPlus.cluster_ID_ttl: triple_value = Rsc.cluster_ttl(value) if value is not None else value # APPENDING ANYTHING ELSE else: if current_property == VoidPlus.cluster_Int_ID_ttl: triple_value = None elif value is not None: triple_value = Literal(round(float(value), 5)).n3(MANAGER) \ if Grl.isDecimalLike(value) \ else Literal(value).n3(MANAGER) else: triple_value = value if triple_value is not None: buffer.write(F"{space * 2}{current_property:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer) except Exception as err: errors += F">>>> [ERROR FROM AnnotatedLinkset_Generic/rdfStarLinkGenerator] {link}, {err}"
def standardLinkGenerator(mappings: dict, link_predicate: str, result_batch, offset=0): """ :param mappings : dictionary of namespaces as keys and prefixes ad values. :param offset : an integer to increment the counting of tghe links :param link_predicate : a turtle representation of a URI (e.i: owl:sameAs). :param result_batch : an iterable object with link results. :param clusters : a dictionary proving the size of the clusters links. :return : Yields a string as set of triples. """ buffer = Buffer() errors = "" def ns_modification(uri): for ns in mappings: if uri.startswith(ns): uri = uri.replace(ns, F"{mappings[ns]}:") break if uri.__contains__("://"): uri = F"<{uri}>" return uri for count, link in enumerate(result_batch): if True: # GET THE SOURCE AND TARGET URIS # src_data, trg_data = link['source'], link['target'] src_data, trg_data = ns_modification(link['source']), ns_modification(link['target']) # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n" F"{space}{src_data} {Rsc.ga_resource_ttl(link_predicate)} {trg_data} .\n") # STANDARD REIFICATION reification = F"{space}{src_data} {Rsc.ga_resource_ttl(link_predicate)} {trg_data} .\n" code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(reification)}") buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" F"\n{space}{code}\n" F"{space}{preVal('a', 'rdf:Statement')}" F"{space}{preVal('rdf:predicate', link_predicate)}" F"{space}{preVal('rdf:subject', F'{src_data}')}" F"{space}{preVal('rdf:object', F'{trg_data}')}") # ANNOTATION OF THE LINK USING THE REIFIED CODE for counter, (feature, value) in enumerate(link.items()): end = ".\n" if counter == len(link) - 1 else ";" cur_predicate = JSON_HEADERS.get(feature, None) if cur_predicate: # APPENDING THE VALIDATION FLAG RESOURCE if cur_predicate == VoidPlus.has_validation_ttl: small = link['source'] if link['source'] < link['target'] else link['target'] big = link['target'] if small == link['source'] else link['source'] # print(F"{small} {big} {link_predicate}") key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) if key is not None else key # APPENDING THE CLUSTER ID AS A RESOURCE elif cur_predicate == VoidPlus.cluster_ID_ttl: triple_value = Rsc.cluster_ttl(value) if value is not None else value # triple_value = None elif cur_predicate == VoidPlus.network_ID_ttl: print("++++++++++++++++++>>>>>>>>>>") triple_value = Literal(value).n3(MANAGER) if value is not None else value # APPENDING ANYTHING ELSE else: if cur_predicate == VoidPlus.cluster_Int_ID_ttl: triple_value = None elif value is not None: triple_value = Literal(round(float(value), 5)).n3(MANAGER) \ if Grl.isDecimalLike(value) \ else Literal(value).n3(MANAGER) else: triple_value = None if triple_value is not None: buffer.write(F"{space * 2}{cur_predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer)
def standardLinkGenerator_fromCSV(link_predicate: str, result_batch, offset=0): """ :param offset : an integer to increment the counting of tghe links :param link_predicate : a turtle representation of a URI (e.i: owl:sameAs). :param result_batch : an iterable object with link results. :param clusters : a dictionary proving the size of the clusters links. :return : Yields a string as set of triples. """ errors = "" vars_size = 0 buffer = Buffer() vars_dic = defaultdict(int) # print(clusters) for count, row in enumerate(result_batch): try: # THE FIRST LINE IS ASSUMED TO BE THE HEADER if count > 0 and len(row) > 1: # GET THE SOURCE AND TARGET URIS src_data, trg_data = row[0], row[1] # GENERATION OF THE LINK if src_data and trg_data: # The RDFStar subject buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n" F"{space}<{src_data}> {Rsc.ga_resource_ttl(link_predicate)} <{trg_data}> .\n") # STANDARD REIFICATION link = F"{space}{src_data} {Rsc.ga_resource_ttl(link_predicate)} {trg_data} .\n" code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(link)}") buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" F"\n{space}{code}\n" F"{space}{preVal('a', 'rdf:Statement')}" F"{space}{preVal('rdf:predicate', link_predicate)}" F"{space}{preVal('rdf:subject', F'<{src_data}>')}" F"{space}{preVal('rdf:object', F'<{trg_data}>')}") # ANNOTATION OF THE LINK USING THE REIFIED CODE for counter, (predicate, index) in enumerate(vars_dic.items()): end = ".\n" if counter == vars_size - 1 else ";" # APPENDING THE CLUSTER SIZE # if clusters and predicate == VoidPlus.cluster_ID_ttl and int(row[index]) in clusters: # buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}" # F"{Literal(clusters[int(row[index])]['size']).n3(MANAGER)} ;\n") # APPENDING THE VALIDATION FLAG # if predicate == VoidPlus.has_validation_flag_ttl: # triple_value = validate.get_resource[row[index]] # APPENDING THE VALIDATION FLAG RESOURCE if predicate == VoidPlus.has_validation_ttl: small = src_data if src_data < trg_data else trg_data big = trg_data if small == src_data else src_data key = Grl.deterministicHash(F"{small}{big}{link_predicate}") triple_value = Rsc.validation_ttl(key) # buffer.write(F"{space * 2}{VoidPlus.has_validation_ttl:{Vars.PRED_SIZE}}{triple_value} {end}\n") # APPENDING THE CLUSTER ID AS A RESOURCE elif predicate == VoidPlus.cluster_ID_ttl: cluster_id = int(row[index]) triple_value = Rsc.cluster_ttl(cluster_id) # clusters[cluster_id]['item'].extend([src_data, trg_data]) # APPENDING ANYTHING ELSE else: triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \ if Grl.isDecimalLike(row[index]) \ else Literal(row[index]).n3(MANAGER) buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n") yield buffer.getvalue() clearBuffer(buffer) else: # THE CSV HEADER # Star at position # MAPPING THE CSV HEADERS row_header = row # print(header, len(header)) for column in range(2, len(row_header)): if row[column] in CSV_HEADERS: vars_dic[CSV_HEADERS[row_header[column]]] = column # print('--->', CSV_HEADERS[header[column]], header[column], column) vars_size += 1 except Exception as err: errors += F">>>> [ERROR FROM AnnotatedLinkset_Generic/standardLinkGenerator] \n\t{row} \n\t{err}" print(errors)