Exemplo n.º 1
0
def resource_ttl(operator):
    return Rsc.operator_ttl(
        labels.get(operator.lower(), 'Unknown').replace(' ', ''))
Exemplo n.º 2
0
class Validate:

    global RSC_SPACE
    TPL_SPACE = 2 + RSC_SPACE
    # header = F"\n\n{'#' * 110}\n#{'VALIDATION TERMINOLOGY':^108}#\n{'#' * 110}\n\n"

    # -------------------------------------- #
    # ACCEPTED
    # -------------------------------------- #
    accepted = Rsc.ga_resource_ttl('Accepted')
    accepted_label = "Accepted"
    accepted_desc = F"""
{space * TPL_SPACE}A validation process that results in the link under scrutiny being flagged as ACCEPTED. 
{space * TPL_SPACE}This, with the intent of notifying that the link has undergone and PASSED a user-defined set 
{space * TPL_SPACE}of checks which gives ground to CONFIRM the rightful creation of the context dependent link."""
    accepted_doc = F"""
{space * RSC_SPACE}### VALIDATED AS ACCEPTED
{space * RSC_SPACE}{accepted}
    {space * RSC_SPACE}{preVal('a', VoidPlus.ValidationFlag_ttl, line=False)}
    {space * RSC_SPACE}{preVal(Sns.RDFS.label_ttl, Literal(accepted_label).n3(), line=False)}
    {space * RSC_SPACE}{preVal(Sns.DCterms.description_ttl, Literal(accepted_desc).n3(), line=False, end=True)}
    """

    # -------------------------------------- #
    # REJECTED
    # -------------------------------------- #
    rejected = Rsc.ga_resource_ttl('Rejected')
    rejected_label = ["Not Accepted", "Rejected"]
    rejected_desc = F"""
{space * TPL_SPACE}A validation process that results in the link under scrutiny being flagged as REJECTED. 
{space * TPL_SPACE}This, with the intent of notifying that the link has undergone and FAILED a user-defined 
{space * TPL_SPACE}set of checks which gives ground to REFUTE the creation of the context dependent link."""
    rejected_doc = F"""
{space * RSC_SPACE}### VALIDATED AS REJECTED
{space * RSC_SPACE}{rejected}
    {space * RSC_SPACE}{preVal('a', VoidPlus.ValidationFlag_ttl, line=False)}
    {space * RSC_SPACE}{preVal(Sns.RDFS.label_ttl, valueList(rejected_label, newLine=False), line=False)}
    {space * RSC_SPACE}{preVal(Sns.DCterms.description_ttl, Literal(rejected_desc).n3(), line=False, end=True)}
    """

    # -------------------------------------- #
    # UNCERTAIN - NOT SURE
    # -------------------------------------- #
    unsure = Rsc.ga_resource_ttl('Uncertain')
    unsure_label = ["Not Sure", "Unsure", "Uncertain"]
    unsure_desc = F"""
{space * TPL_SPACE}A validation process that results in the link under scrutiny being flagged as UNCERTAIN. 
{space * TPL_SPACE}This flag reveals the lack of confidence in confirming or refuting the creation of
{space * TPL_SPACE}the context dependent link."""
    unsure_doc = F"""
{space * RSC_SPACE}### VALIDATED AS UNCERTAIN
{space * RSC_SPACE}{unsure}
    {space * RSC_SPACE}{preVal('a', VoidPlus.ValidationFlag_ttl, line=False)}
    {space * RSC_SPACE}{preVal(Sns.RDFS.label_ttl, valueList(unsure_label, newLine=False), line=False)}
    {space * RSC_SPACE}{preVal(Sns.DCterms.description_ttl, Literal(unsure_desc).n3(), line=False, end=True)}
    """

    # -------------------------------------- #
    # UNCHECKED - NOT VALIDATED
    # -------------------------------------- #
    unchecked = Rsc.ga_resource_ttl('Unchecked')
    unchecked_label = ["Not Validated", "Not Checked", "Unchecked"]
    unchecked_desc = F"""
{space * TPL_SPACE}Flagging a link as UNCHECKED literally highlights that it has not undergone any user-defined scrutiny 
{space * TPL_SPACE}such that it could be flagged as ACCEPTED, REJECTED or UNCERTAIN"""
    unchecked_doc = F"""
{space * RSC_SPACE}### VALIDATED AS UNCHECKED
{space * RSC_SPACE}{unchecked}
    {space * RSC_SPACE}{preVal('a', VoidPlus.ValidationFlag_ttl, line=False)}
    {space * RSC_SPACE}{preVal(Sns.RDFS.label_ttl, valueList(unchecked_label, newLine=False), line=False)}
    {space * RSC_SPACE}{preVal(Sns.DCterms.description_ttl, Literal(unchecked_desc).n3(), line=False, end=True)}
    """

    # -------------------------------------- #
    # MIXED - DISPUTED - CONTRADICTION
    # -------------------------------------- #
    mixed = Rsc.ga_resource_ttl('Disputed')
    mixed_label = ["Contradictory", "Disputed"]
    mixed_desc = F"""
{space * TPL_SPACE}A validation process that results in the link under scrutiny being flagged as DISPUTED.  
{space * TPL_SPACE}This, with the intent of notifying that the link has undergone MULTIPLE user-defined set of checks
{space * TPL_SPACE}which result in a contradiction. In other words, the same link has been flagged with contradicting
{space * TPL_SPACE}labels such as for example ACCEPTED, ACCEPTED, UNSURE AND UNCHECKED"""
    mixed_doc = F"""
{space * RSC_SPACE}### VALIDATED AS DISPUTED
{space * RSC_SPACE}{mixed}
    {space * RSC_SPACE}{preVal('a', VoidPlus.ValidationFlag_ttl, line=False)}
    {space * RSC_SPACE}{preVal(Sns.RDFS.label_ttl, valueList(mixed_label, newLine=False), line=False)}
    {space * RSC_SPACE}{preVal(Sns.DCterms.description_ttl, Literal(mixed_desc).n3(), line=False, end=True)}
        """

    generic_desc = F"""
{space * TPL_SPACE}Unless explicitly stated in a specific validation resource, 
{space * TPL_SPACE}a. An accepted link entails that the validator agrees with the alignment (matching) in general and 
{space * TPL_SPACE}with the specific correspondence under scrutiny.
{space * TPL_SPACE}b. A rejected link entails that the validator agrees with the alignment (matching) in general but 
{space * TPL_SPACE}disagrees with the specific correspondence under scrutiny. Together, the values of the selected 
{space * TPL_SPACE}properties provided by the validator justify her disagreement.
{space * TPL_SPACE}c. Rejecting an alignment entails that the validator disagrees with the alignment in general and 
{space * TPL_SPACE}may have selected a set of properties for which the values justify her disagreement."""

    get_resource = {
        'accepted': accepted,
        'rejected': rejected,
        'not_sure': unsure,
        'not_validated': unchecked,
        'mixed': mixed
    }

    get_triples = {
        'accepted': accepted_doc,
        'rejected': rejected_doc,
        'not_sure': unsure_doc,
        'not_validated': unchecked_doc,
        'mixed': mixed_doc
    }

    def terminology(self):

        return F"{header('LINK VALIDATION TERMINOLOGY')}" \
               F"{self.accepted_doc}" \
               F"{self.rejected_doc}" \
               F"{self.unsure_doc}" \
               F"{self.unchecked_doc}" \
               F"{self.mixed_doc}"
Exemplo n.º 3
0
def csv2Linkset(csv_linkset_file: str, link_type: str, auto_prefixes: dict):
    errors = ""
    ttl_file = F"{csv_linkset_file}.ttl"

    # LOAD PARAMETERS THAT ARE NOT THE SOURCE - TARGET - OR PROPERTY
    vars_dic = defaultdict(int)
    vars_size = 0

    # WRITING THE TURTLE FILE
    with open(ttl_file, "w") as writer:

        # Reading the old csv file
        with open(csv_linkset_file, "r") as csv_file:

            # Dissect the link-type for prefix - namespace - name and turtle
            predicate_data = uri2ttl(reconstructTurtle(
                link_type, auto_prefixes=auto_prefixes),
                                     auto_prefixes=auto_prefixes)

            for count, row in enumerate(csv_reader(csv_file)):

                if True:

                    # THE FIRST LINE IS ASSUMED TO BE THE HEADER
                    if count > 0:

                        # GET THE SOURCE AND TARGET URIS
                        src_data, trg_data = uri2ttl(
                            uri=row[0], auto_prefixes=auto_prefixes), uri2ttl(
                                row[1], auto_prefixes)

                        # GENERATION OF THE LINK
                        if src_data and trg_data and src_data[
                                3] is not None and trg_data[3] is not None:

                            # The RDFStar subject
                            writer.write(F"\n\t### LINK Nbr: {count}\n")
                            link = F"\t{src_data[3]}    {predicate_data[3]}    {trg_data[3]} .\n"
                            writer.write(
                                F"\t{src_data[3]}    {predicate_data[3]}    {trg_data[3]} .\n"
                            )

                            # STANDARD REIFICATION
                            code = Rsc.ga_resource_ttl(
                                F"Reification-{Grl.deterministicHash(link)}")
                            writer.write(
                                F"\n\t### STANDARD REIFICATION Nbr: {count}")
                            writer.write(F"\n\t{code}\n")
                            writer.write(F"\t{preVal('a', 'rdf:Statement')}")
                            writer.write(
                                F"\t{preVal('rdf:predicate', predicate_data[3])}"
                            )
                            writer.write(
                                F"\t{preVal('rdf:subject', src_data[3])}")
                            writer.write(
                                F"\t{preVal('rdf:object', trg_data[3])}")

                            # ANNOTATION OF THE LINK USING THE REIFGIED CODE
                            for counter, (predicate, index) in enumerate(
                                    vars_dic.items()):
                                end = ".\n" if counter == vars_size - 1 else ";"
                                writer.write(
                                    F"\t\t{predicate:{Vars.PRED_SIZE}}"
                                    F" {Literal(row[index]).n3() if not Grl.isDecimalLike(row[index]) else round(float(row[index]), 5)} {end}\n"
                                )

                    # THE MAPPING OF GHE CSV HEADERS TO VOIDPLUS RDF PREDICATES
                    else:

                        # THE CSV HEADER
                        # Star at position
                        for column in range(2, len(row)):

                            if row[column] in CSV_HEADERS:
                                vars_dic[CSV_HEADERS[row[column]]] = column
                                vars_size += 1

                # except Exception as err:
                #     errors += F">>>> [ERROR FROM csv_2_linkset] {row}, {err}"

    return ttl_file
def rdfStarLinkGenerator_fromCSV(link_predicate: str, result_batch, offset=0):

    errors = ""
    vars_size = 0
    buffer = Buffer()
    vars_dic = defaultdict(int)

    for count, row in enumerate(result_batch):

        if True:

            # THE FIRST LINE IS ASSUMED TO BE THE HEADER
            if count > 0 and len(row) > 1:

                # GET THE SOURCE AND TARGET URIS
                src_data, trg_data = row[0], row[1]

                # GENERATION OF THE LINK
                if src_data and trg_data:

                    # The RDFStar subject
                    buffer.write(F"{space}### LINK Nbr: {count + offset}\n"
                                 F"{space}<<<{src_data}>    {link_predicate}    <{trg_data}>>>\n"
                                 if len(vars_dic) > 0
                                 else F"{space}<{src_data}>    {link_predicate}    <{trg_data}> .\n")

                    # ANNOTATION OF THE LINK
                    # ll_val:has-link-validation               "not_validated" .
                    for counter, (predicate, index) in enumerate(vars_dic.items()):
                        end = ".\n" if counter == vars_size - 1 else ";"

                        # APPENDING THE CLUSTER SIZE
                        # if clusters and predicate == VoidPlus.cluster_ID_ttl and int(row[index]) in clusters:
                        #     buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}"
                        #                  F"{Literal(clusters[int(row[index])]['size']).n3(MANAGER)} ;\n")

                        # APPENDING THE VALIDATION FLAG
                        # if predicate == VoidPlus.has_validation_flag_ttl:
                        #     triple_value = validate.get_resource[row[index]]

                        # APPENDING THE VALIDATION FLAG RESOURCE
                        if predicate == VoidPlus.has_validation_ttl:
                            small = src_data if src_data < trg_data else trg_data
                            big = trg_data if small == src_data else src_data
                            key = Grl.deterministicHash(F"{small}{big}{link_predicate}")
                            triple_value = Rsc.validation_ttl(key)
                            # buffer.write(F"{space * 2}{VoidPlus.has_validation_ttl:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                        # APPENDING THE CLUSTER ID AS A RESOURCE
                        elif predicate == VoidPlus.cluster_ID_ttl:
                            cluster_id = int(row[index])
                            triple_value = Rsc.cluster_ttl(cluster_id)
                            # clusters[cluster_id]['item'].extend([src_data, trg_data])

                        # APPENDING ANYTHING ELSE
                        else:
                            triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \
                                if Grl.isDecimalLike(row[index]) \
                                else Literal(row[index]).n3(MANAGER)

                        buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                        # buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}"
                        #              F"{validate.get_resource[row[index]] if not Grl.isDecimalLike(row[index]) else round(float(row[index]), 5)} {end}\n")

                    yield buffer.getvalue()
                    clearBuffer(buffer)

            else:

                # THE CSV HEADER
                # Star at position
                for column in range(2, len(row)):

                    if row[column] in CSV_HEADERS:
                        vars_dic[CSV_HEADERS[row[column]]] = column
                        vars_size += 1
def validationGraphGenerator(validationset, linksetStats, auto_prefixes, setGraph, set_id, created, isLinkset: bool):

    # THE LAST STATUS MUST ALWAYS HAVE A VALUE DO THAT IT DETERMINES THE LAST TRIPLE
    predicate_map = {
        "Motivation": VoidPlus.motivation_ttl,
        "Status": VoidPlus.has_validation_status_ttl
    }

    if isLinkset is False:
        auto_prefixes[Rsc.lens] = "lens"

    if validationset:

        validationset_graph = F"{Rsc.validationset_ttl(Grl.deterministicHash(validationset))}-{set_id}"
        writer = Buffer()

        # ADDING THE CLUSTER NAMESPACE
        # auto_prefixes[Rsc.validationset] = "validationset"

        # APPENDING ALL NAMESPACES
        writer.write(
            linksetNamespaces(
                auto_prefixes,
                # isValidated=validationset and len(validationset['items']) > 0,
                isValidated=True,
                isClustered=Vars.clusters in linksetStats and linksetStats[Vars.clusters] > 0
            ))

        # VALIDATION METADATA
        writer.write(F'{header("LINK VALIDATION METADATA")}\n\n')
        writer.write(F"{validationset_graph}\n")
        writer.write(preVal('a', VoidPlus.Validationset_ttl))
        writer.write(preVal(VoidPlus.hasTarget_ttl, setGraph))
        if "creator" in validationset and len(validationset["creator"].strip()) > 0:
            writer.write(preVal(Sns.DCterms.creator_ttl, Literal(validationset["creator"]).n3()))
        if "publisher" in validationset and len(validationset["publisher"].strip()) > 0:
            writer.write(preVal(Sns.DCterms.publisher_ttl, Literal(validationset["publisher"]).n3()))

        # CREATED
        writer.write(preVal(Sns.DCterms.created_ttl, Literal(created, datatype=XSD.dateTi).n3(MANAGER)))

        # EXPORT TIMESTAMP
        writer.write(preVal(VoidPlus.exportDate_ttl, Grl.getXSDTimestamp()))

        # VALIDATION STATS
        # THE TOTAL AMOUNT OF LINKS ACCEPTED
        writer.write(F"\n{space}### VOID+ VALIDATION STATS\n")
        if Vars.accepted in linksetStats and linksetStats[Vars.accepted] > -1:
            writer.write(preVal(VoidPlus.accepted_ttl, Rsc.literal_resource(linksetStats[Vars.accepted])))

        # THE TOTAL AMOUNT OF LINKS REJECTED
        if Vars.rejected in linksetStats and linksetStats[Vars.rejected] > -1:
            writer.write(preVal(VoidPlus.rejected_ttl, Rsc.literal_resource(linksetStats[Vars.rejected])))

        # THE TOTAL AMOUNT OF LINKS WITH AN UNCERTAIN VALIDATION FLAG
        if Vars.not_sure in linksetStats and linksetStats[Vars.not_sure] > -1:
            writer.write(preVal(VoidPlus.uncertain_ttl, Rsc.literal_resource(linksetStats[Vars.not_sure])))

        # THE TOTAL AMOUNT OF LINKS NOT VALIDATED
        if Vars.notValidated in linksetStats and linksetStats[Vars.notValidated] > -1:
            writer.write(
                preVal(VoidPlus.unchecked_ttl, Rsc.literal_resource(linksetStats[Vars.notValidated])))

        writer.write("\n")
        writer.write(preVal(Sns.DCterms.description_ttl, Rsc.literal_resource(validate.generic_desc), end=True))

        # VALIDATION TERMS
        writer.write(validate.terminology())

        # VALIDATIONSET
        writer.write(F'{header("VALIDATIONSET")}\n\n')
        writer.write(F"{validationset_graph}\n{{")

        # VALIDATIONS
        for key, validation in validationset['items'].items():
            # print(validation)
            writer.write(F'\n\t{Rsc.validation_ttl(key)}\n')
            writer.write(preVal('a', VoidPlus.Validation_ttl, position=2))

            for index, (val_header, value) in enumerate(predicate_map.items()):

                end = True if index == len(predicate_map) - 1 else False
                curr_feature = predicate_map.get(val_header, None)

                if curr_feature:

                    #  aACCEPTED | REJECTED | NOT-VALIDATED : UNSURE | MIXED
                    if curr_feature == VoidPlus.has_validation_status_ttl:
                        writer.write(preVal(VoidPlus.has_validation_status_ttl, validate.get_resource[validation[val_header]], end=end, position=2))

                    elif validation[val_header]:
                        writer.write(preVal(curr_feature, Literal(validation[val_header]).n3(MANAGER), end=end, position=2))

        writer.write("}")
        # print(writer.getvalue())
        return writer.getvalue()
def clusterGraphGenerator(clusters, stats, auto_prefixes, linksetGraph, created, linkset_id):

    node_count = 0
    validated = 0
    clusterset_graph = F"{Rsc.clusterset_ttl(Grl.deterministicHash(clusters))}-{linkset_id}"

    if clusters:

        # ADDING THE CLUSTER NAMESPACE
        # auto_prefixes[Rsc.clusterset] = "clusterset"

        writer = Buffer()
        predicate_map = {
            # SET OF NODES
            # "nodes": VoidPlus.size_ttl,
            "extended": VoidPlus.extended_ttl,
            "id": VoidPlus.intID_ttl,
            "hash_id": VoidPlus.hashID_ttl,
            # VALIDATIONS
            # "links": VoidPlus.links_ttl,
            "reconciled": VoidPlus.reconciled_ttl,
            "size": VoidPlus.size_ttl,
            "accepted": VoidPlus.accepted_ttl,
            "rejected": VoidPlus.rejected_ttl,
            "not_sure": VoidPlus.uncertain_ttl,
            "mixed": VoidPlus.contradictions_ttl,
            "not_validated": VoidPlus.unchecked_ttl,
            'network_id': VoidPlus.network_ID_ttl
        }

    # APPENDING ALL NAMESPACES
    writer.write(
        linksetNamespaces(
            auto_prefixes, isClustered=clusters and len(clusters) > 0,
            isValidated=(Vars.notValidated in stats and stats[Vars.notValidated] < stats[Vars.triples]) is True
        ))

    # THE CLUSTER METADATA
    writer.write(F'{header("RESOURCE PARTITIONING METADATA")}\n\n')
    writer.write(F"{clusterset_graph}\n")
    writer.write(preVal('a', VoidPlus.Clusterset_ttl))
    writer.write(preVal(VoidPlus.clusters_ttl, Literal(len(clusters)).n3(MANAGER)))
    writer.write(preVal(Sns.VoID.entities_ttl, "###NodeCounts"))
    writer.write(preVal(VoidPlus.validations_ttl, "###VALIDATED"))

    writer.write(preVal(VoidPlus.largestNodeCount_ttl, Rsc.literal_resource(stats['largest_size'])))
    writer.write(preVal(VoidPlus.largestLinkCount_ttl, Rsc.literal_resource(stats['largest_count'])))

    writer.write(preVal(VoidPlus.hasTarget_ttl, linksetGraph))
    writer.write(preVal(VoidPlus.method_ttl, Algorithm.simple_clustering_ttl))
    # EXPORT TIMESTAMP
    writer.write(preVal(VoidPlus.exportDate_ttl, Grl.getXSDTimestamp()))
    # CREATED TIMESTAMP
    writer.write(preVal(Sns.DCterms.created_ttl, Literal(created, datatype=XSD.dateTi).n3(MANAGER), end=True))

    # DESCRIPTION OF THE CLUSTERING ALGORITHM
    writer.write(F'\n\n{Algorithm.simple_clustering_ttl}\n')
    writer.write(preVal('a', VoidPlus.ClusteringAlgorithm_ttl))
    writer.write(preVal(Sns.DCterms.description_ttl, Literal(Algorithm.simple_clustering_short_description).n3(MANAGER)))
    writer.write(preVal(Sns.RDFS.seeAlso_ttl, Rsc.ga_resource_ttl("https://doi.org/10.3233/SW-200410"), end=True))

    # THE PARTITION OF CO-REFERENT MATCHED RESOURCES
    writer.write(F'{header("ANNOTATED CO-REFERENT RESOURCES")}\n\n')
    writer.write(F"{clusterset_graph}\n{{\n")
    for cid, cluster_data in clusters.items():
        # print(cluster_data.keys())
        # exit()
        temp = Buffer()

        # A CLUSTER RESOURCE
        writer.write(F"\n\t{Rsc.cluster_ttl(cid)}\n")
        writer.write(preVal('a', VoidPlus.Cluster_ttl, position=2))

        for feature, value in cluster_data.items():

            # CLUSTERED RESOURCES
            if feature == 'nodes':

                if value:
                    nodes = set(value)
                    # temp.write(preVal(predicate_map[feature], Literal(len(nodes)).n3(MANAGER), position=2))
                    node_count += len(nodes)
                    temp.write(
                        preVal(
                            VoidPlus.hasItem_ttl,
                            F" ,\n{space*2}{' ' * Vars.PRED_SIZE}".join(Rsc.ga_resource_ttl(elt) for elt in nodes),
                            position=2
                        )
                    )

            # VALIDATION FLAGS
            elif feature == "links":

                if value and value['not_validated'] == 0:
                    validated += 1

                for flag, integer in value.items():
                    temp.write(
                        preVal(
                            predicate_map[flag],
                            Literal(integer).n3(MANAGER),
                            position=2
                        )
                    )

            elif feature in ["values"]:
                pass

            # ABOUT THE CLUSTER'S SIZE, Extension, Reconciliation, intID
            else:
                temp.write(preVal(predicate_map[feature], Literal(value).n3(MANAGER), position=2))

        writer.write(F"{temp.getvalue()[:-2]}.\n")

    # print(triples.getvalue())
    result = writer.getvalue().replace('###NodeCounts', Literal(node_count).n3(MANAGER))
    return F"{result.replace('###VALIDATED', Literal(validated).n3(MANAGER))}}}"
def standardLinkGenerator2(link_predicate: str, result_batch, namespace, clusters=None, offset=0):

    """
    :param offset           : an integer to increment the counting of tghe links
    :param link_predicate   : a turtle representation of a URI (e.i: owl:sameAs).
    :param namespace        : a dictionary for namespace
    :param result_batch     : an iterable object with link results.
    :param clusters         : a dictionary proving the size of the clusters links.
    :return                 : Yields a string as set of triples.
    """
    errors = ""
    vars_size = 0
    buffer = Buffer()
    vars_dic = defaultdict(int)

    for count, row in enumerate(result_batch):

        try:

            # THE FIRST LINE IS ASSUMED TO BE THE HEADER
            if count > 0 and len(row) > 1:

                # GET THE SOURCE AND TARGET URIS
                src_data, trg_data, predicate = uri2ttl(row[0], namespace)["short"], \
                                                uri2ttl(row[1], namespace)["short"], \
                                                uri2ttl(link_predicate, namespace)["short"]
                print(src_data)

                # GENERATION OF THE LINK
                if src_data and trg_data:

                    # The RDFStar subject
                    buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n"
                                 F"{space}{src_data}    {Rsc.ga_resource_ttl(predicate)}    {trg_data} .\n")

                    # STANDARD REIFICATION
                    link = F"{space}{src_data}    {Rsc.ga_resource_ttl(predicate)}    {trg_data} .\n"
                    code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(link)}")
                    buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" 
                                 F"\n{space}{code}\n" 
                                 F"{space}{preVal('a', 'rdf:Statement')}" 
                                 F"{space}{preVal('rdf:predicate', predicate)}" 
                                 F"{space}{preVal('rdf:subject', F'{src_data}')}" 
                                 F"{space}{preVal('rdf:object', F'{trg_data}')}")

                    # ANNOTATION OF THE LINK USING THE REIFIED CODE
                    for counter, (predicate, index) in enumerate(vars_dic.items()):
                        end = ".\n" if counter == vars_size - 1 else ";"

                        # APPENDING THE CLUSTER SIZE
                        if clusters and predicate == VoidPlus.cluster_ID_ttl and row[index] in clusters:
                            buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}"
                                         F"{Literal(clusters[row[index]]).n3(MANAGER)} ;\n")

                        # APPENDING THE VALIDATION FLAG
                        if predicate == VoidPlus.has_validation_status_ttl:
                            triple_value = validate.get_resource[row[index]]

                        # APPENDING DING ANYTHING ELSE
                        else:
                            triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \
                                if Grl.isDecimalLike(row[index]) \
                                else Literal(row[index]).n3(MANAGER)

                        buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                    yield buffer.getvalue()
                    clearBuffer(buffer)

            else:

                # THE CSV HEADER
                # Star at position
                # MAPPING THE CSV HEADERS
                for column in range(2, len(row)):
                    header = row
                    if row[column] in CSV_HEADERS:
                        vars_dic[CSV_HEADERS[row[column]]] = column
                        vars_size += 1

        except Exception as err:
            errors += F">>>> [ERROR FROM csv_2_linkset] {row}, {err}"
            print(errors)
def standardLinkGenerator(mappings: dict, link_predicate: str, result_batch, offset=0):

    """
    :param mappings         : dictionary of namespaces as keys and prefixes ad values.
    :param offset           : an integer to increment the counting of tghe links
    :param link_predicate   : a turtle representation of a URI (e.i: owl:sameAs).
    :param result_batch     : an iterable object with link results.
    :param clusters         : a dictionary proving the size of the clusters links.
    :return                 : Yields a string as set of triples.
    """

    buffer = Buffer()
    errors = ""

    def ns_modification(uri):

        for ns in mappings:
            if uri.startswith(ns):
                uri = uri.replace(ns, F"{mappings[ns]}:")
                break

        if uri.__contains__("://"):
            uri = F"<{uri}>"

        return uri

    for count, link in enumerate(result_batch):

        if True:

            # GET THE SOURCE AND TARGET URIS
            # src_data, trg_data = link['source'], link['target']
            src_data, trg_data = ns_modification(link['source']), ns_modification(link['target'])

            # GENERATION OF THE LINK
            if src_data and trg_data:

                # The RDFStar subject
                buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n"
                             F"{space}{src_data}    {Rsc.ga_resource_ttl(link_predicate)}    {trg_data} .\n")

                # STANDARD REIFICATION
                reification = F"{space}{src_data}    {Rsc.ga_resource_ttl(link_predicate)}    {trg_data} .\n"
                code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(reification)}")
                buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" 
                             F"\n{space}{code}\n" 
                             F"{space}{preVal('a', 'rdf:Statement')}" 
                             F"{space}{preVal('rdf:predicate', link_predicate)}" 
                             F"{space}{preVal('rdf:subject', F'{src_data}')}" 
                             F"{space}{preVal('rdf:object', F'{trg_data}')}")

                # ANNOTATION OF THE LINK USING THE REIFIED CODE
                for counter, (feature, value) in enumerate(link.items()):

                    end = ".\n" if counter == len(link) - 1 else ";"

                    cur_predicate = JSON_HEADERS.get(feature, None)

                    if cur_predicate:

                        # APPENDING THE VALIDATION FLAG RESOURCE
                        if cur_predicate == VoidPlus.has_validation_ttl:
                            small = link['source'] if link['source'] < link['target'] else link['target']
                            big = link['target'] if small == link['source'] else link['source']
                            # print(F"{small} {big} {link_predicate}")
                            key = Grl.deterministicHash(F"{small}{big}{link_predicate}")
                            triple_value = Rsc.validation_ttl(key) if key is not None else key

                        # APPENDING THE CLUSTER ID AS A RESOURCE
                        elif cur_predicate == VoidPlus.cluster_ID_ttl:
                            triple_value = Rsc.cluster_ttl(value) if value is not None else value
                            # triple_value = None

                        elif cur_predicate == VoidPlus.network_ID_ttl:
                            print("++++++++++++++++++>>>>>>>>>>")
                            triple_value = Literal(value).n3(MANAGER) if value is not None else value

                        # APPENDING ANYTHING ELSE
                        else:
                            if cur_predicate == VoidPlus.cluster_Int_ID_ttl:
                                triple_value = None

                            elif value is not None:
                                triple_value = Literal(round(float(value), 5)).n3(MANAGER) \
                                    if Grl.isDecimalLike(value) \
                                    else Literal(value).n3(MANAGER)
                            else:
                                triple_value = None

                        if triple_value is not None:
                            buffer.write(F"{space * 2}{cur_predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                yield buffer.getvalue()
                clearBuffer(buffer)
def rdfStarLinkGenerator(mappings: dict, link_predicate: str, result_batch, offset=0):

    errors = ""
    buffer = Buffer()

    def ns_modification(uri):

        for ns in mappings:
            if uri.startswith(ns):
                uri = uri.replace(ns, F"{mappings[ns]}:")
                break

        if uri.__contains__("://"):
            uri = F"<{uri}>"

        return uri

    for count, link in enumerate(result_batch):

        try:

            # GET THE SOURCE AND TARGET URIS
            src_data, trg_data = ns_modification(link['source']), ns_modification(link['target'])

            # GENERATION OF THE LINK
            if src_data and trg_data:

                # The RDFStar subject
                buffer.write(F"{space}### LINK Nbr: {count + offset}\n"
                             F"{space}<<{src_data}    {link_predicate}    {trg_data}>>\n")

                # ANNOTATION OF THE LINK
                # ll_val:has-link-validation               "not_validated" .
                for counter, (feature, value) in enumerate(link.items()):
                    end = ".\n" if counter == len(link) - 1 else ";"

                    current_property = JSON_HEADERS.get(feature, None)

                    if current_property:

                        # APPENDING THE VALIDATION FLAG RESOURCE
                        if current_property == VoidPlus.has_validation_ttl:
                            small = link['source'] if link['source'] < link['target'] else link['target']
                            big = link['target'] if small == link['source'] else link['source']
                            key = Grl.deterministicHash(F"{small}{big}{link_predicate}")
                            triple_value = Rsc.validation_ttl(key) if key is not None else key

                        # NOT APPENDING THE CLUSTER INT ID
                        elif current_property == VoidPlus.cluster_ID_ttl:
                            triple_value = Rsc.cluster_ttl(value) if value is not None else value

                        # APPENDING ANYTHING ELSE
                        else:
                            if current_property == VoidPlus.cluster_Int_ID_ttl:
                                triple_value = None

                            elif value is not None:
                                triple_value = Literal(round(float(value), 5)).n3(MANAGER) \
                                    if Grl.isDecimalLike(value) \
                                    else Literal(value).n3(MANAGER)
                            else:
                                triple_value = value

                        if triple_value is not None:
                            buffer.write(F"{space * 2}{current_property:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                yield buffer.getvalue()
                clearBuffer(buffer)

        except Exception as err:
            errors += F">>>> [ERROR FROM AnnotatedLinkset_Generic/rdfStarLinkGenerator] {link}, {err}"
def standardLinkGenerator_fromCSV(link_predicate: str, result_batch, offset=0):

    """
    :param offset           : an integer to increment the counting of tghe links
    :param link_predicate   : a turtle representation of a URI (e.i: owl:sameAs).
    :param result_batch     : an iterable object with link results.
    :param clusters         : a dictionary proving the size of the clusters links.
    :return                 : Yields a string as set of triples.
    """
    errors = ""
    vars_size = 0
    buffer = Buffer()
    vars_dic = defaultdict(int)
    # print(clusters)
    for count, row in enumerate(result_batch):

        try:

            # THE FIRST LINE IS ASSUMED TO BE THE HEADER
            if count > 0 and len(row) > 1:

                # GET THE SOURCE AND TARGET URIS
                src_data, trg_data = row[0], row[1]

                # GENERATION OF THE LINK
                if src_data and trg_data:

                    # The RDFStar subject
                    buffer.write(F"\n{space}### LINK Nbr: {count + offset}\n"
                                 F"{space}<{src_data}>    {Rsc.ga_resource_ttl(link_predicate)}    <{trg_data}> .\n")

                    # STANDARD REIFICATION
                    link = F"{space}{src_data}    {Rsc.ga_resource_ttl(link_predicate)}    {trg_data} .\n"
                    code = Rsc.ga_resource_ttl(F"Reification-{Grl.deterministicHash(link)}")
                    buffer.write(F"\n{space}### STANDARD REIFICATION Nbr: {count}" 
                                 F"\n{space}{code}\n" 
                                 F"{space}{preVal('a', 'rdf:Statement')}" 
                                 F"{space}{preVal('rdf:predicate', link_predicate)}" 
                                 F"{space}{preVal('rdf:subject', F'<{src_data}>')}" 
                                 F"{space}{preVal('rdf:object', F'<{trg_data}>')}")

                    # ANNOTATION OF THE LINK USING THE REIFIED CODE
                    for counter, (predicate, index) in enumerate(vars_dic.items()):
                        end = ".\n" if counter == vars_size - 1 else ";"

                        # APPENDING THE CLUSTER SIZE
                        # if clusters and predicate == VoidPlus.cluster_ID_ttl and int(row[index]) in clusters:
                        #     buffer.write(F"{space * 2}{VoidPlus.cluster_size_ttl:{Vars.PRED_SIZE}}"
                        #                  F"{Literal(clusters[int(row[index])]['size']).n3(MANAGER)} ;\n")

                        # APPENDING THE VALIDATION FLAG
                        # if predicate == VoidPlus.has_validation_flag_ttl:
                        #     triple_value = validate.get_resource[row[index]]

                        # APPENDING THE VALIDATION FLAG RESOURCE
                        if predicate == VoidPlus.has_validation_ttl:
                            small = src_data if src_data < trg_data else trg_data
                            big = trg_data if small == src_data else src_data
                            key = Grl.deterministicHash(F"{small}{big}{link_predicate}")
                            triple_value = Rsc.validation_ttl(key)
                            # buffer.write(F"{space * 2}{VoidPlus.has_validation_ttl:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                        # APPENDING THE CLUSTER ID AS A RESOURCE
                        elif predicate == VoidPlus.cluster_ID_ttl:
                            cluster_id = int(row[index])
                            triple_value = Rsc.cluster_ttl(cluster_id)
                            # clusters[cluster_id]['item'].extend([src_data, trg_data])

                        # APPENDING ANYTHING ELSE
                        else:
                            triple_value = Literal(round(float(row[index]), 5)).n3(MANAGER) \
                                if Grl.isDecimalLike(row[index]) \
                                else Literal(row[index]).n3(MANAGER)

                        buffer.write(F"{space * 2}{predicate:{Vars.PRED_SIZE}}{triple_value} {end}\n")

                    yield buffer.getvalue()
                    clearBuffer(buffer)

            else:

                # THE CSV HEADER
                # Star at position
                # MAPPING THE CSV HEADERS
                row_header = row
                # print(header, len(header))

                for column in range(2, len(row_header)):

                    if row[column] in CSV_HEADERS:
                        vars_dic[CSV_HEADERS[row_header[column]]] = column
                        # print('--->', CSV_HEADERS[header[column]], header[column], column)
                        vars_size += 1

        except Exception as err:
            errors += F">>>> [ERROR FROM AnnotatedLinkset_Generic/standardLinkGenerator] \n\t{row} \n\t{err}"
            print(errors)