Пример #1
0
    def __init__(self, predicate, reported, value, other_attributes=None):
        super(CulturalForm, self).__init__()
        self.predicate = predicate
        self.reported = reported
        self.value = value

        if other_attributes:
            self.uri = other_attributes
        elif self.reported:
            self.uri = utilities.create_uri("cwrc",
                                            self.predicate + self.reported)
        else:
            self.uri = utilities.create_uri("cwrc", self.predicate)

        self.uri = rdflib.term.URIRef(self.uri)
Пример #2
0
    def __init__(self,
                 types,
                 value,
                 personName,
                 uri=None,
                 extraAttributes=None,
                 parentType=None,
                 otherTriples=None):
        self.id = utilities.remove_punctuation("NameEnt " + value)
        self.uri = None
        # used for body in the context sections
        self.predicate = utilities.NS_DICT["cwrc"].hasName
        self.value = utilities.make_standard_uri(
            personName + " NameEnt " +
            value)  # used for body in context sections
        self.typeLabels = []
        self.personName = Literal(value)
        self.otherTriples = otherTriples
        for thisType in types:
            self.typeLabels.append(utilities.create_uri("cwrc", thisType))

        if uri:
            self.uri = uri

        self.hasSpareGraph = False
        self.spareGraph = None

        if "BirthName" in types:
            self.spareGraph = self.makeBirthGraph(
                givenNameList=extraAttributes.givenNames,
                surNameList=extraAttributes.surNames)
            self.hasSpareGraph = True
        if parentType and parentType == "Nickname":
            print("hello")
Пример #3
0
    def __init__(self, predicate, place, other_attributes=None):
        super(Location, self).__init__()
        self.predicate = predicate
        self.value = Place(place).uri

        if other_attributes:
            self.uri = other_attributes

        self.uri = utilities.create_uri("cwrc", self.predicate)
Пример #4
0
    def __init__(self,
                 id,
                 tag,
                 context_type="CULTURALFORMATION",
                 motivation="describing"):
        super(Context, self).__init__()
        self.id = id
        self.triples = []
        self.event = None

        unwanted_tags = tag.find_all("BIBCITS") + tag.find_all(
            "RESPONSIBILITIES") + tag.find_all("KEYWORDCLASSES")
        for x in unwanted_tags:
            x.decompose()

        self.tag = tag
        self.heading = get_heading(tag)
        self.src = "http://orlando.cambridge.org/protected/svPeople?formname=r&people_tab=3&person_id="
        if not self.heading:
            self.src = "http://orlando.cambridge.org"

        # TODO: Make snippet start where first triple is extracted from
        # Making the text the max amount of words
        self.text = utilities.limit_words(str(tag.get_text()), MAX_WORD_COUNT)

        if context_type in self.context_map:
            self.context_type = utilities.create_uri(
                "cwrc", self.context_map[context_type])
            self.context_label = self.context_map[context_type].split(
                "Context")[0] + " Context"
        else:
            self.context_type = utilities.create_uri("cwrc", context_type)
            self.context_label = context_type.split("Context")[0] + " Context"

        self.motivation = utilities.create_uri("oa", motivation)
        self.subjects = []
        if motivation == "identifying":
            self.subjects = identifying_motivation(self.tag)
        self.uri = utilities.create_uri("data", id)
Пример #5
0
 def create_ttl_body(self, triple_str):
     g = rdflib.Graph()
     format_str = rdflib.term.Literal("text/turtle",
                                      datatype=rdflib.namespace.XSD.string)
     format_uri = utilities.create_uri("dcterms", "format")
     triple_str = rdflib.term.Literal(triple_str,
                                      datatype=rdflib.namespace.XSD.string)
     temp_body = rdflib.BNode()
     g.add((self.uri, utilities.NS_DICT["oa"].hasBody, temp_body))
     g.add((temp_body, RDF.type, utilities.NS_DICT["oa"].TextualBody))
     g.add((temp_body, RDF.value, triple_str))
     g.add((temp_body, format_uri, format_str))
     return g
Пример #6
0
    def __init__(self, job_tag, predicate=None, other_attributes=None):
        super(Occupation, self).__init__()
        if predicate:
            self.predicate = predicate
            self.value = self.get_mapped_term(job_tag)
        else:
            self.predicate = self.get_occupation_predicate(job_tag)
            if self.predicate == "hasEmployer":
                self.value = self.get_employer(job_tag)
            elif self.predicate == "hasOccupationIncome":
                self.value = Literal(self.get_value(job_tag))
            else:
                self.value = self.get_mapped_term(self.get_value(job_tag))

        if other_attributes:
            self.uri = other_attributes

        self.uri = utilities.create_uri("cwrc", self.predicate)
Пример #7
0
    def __init__(self, title, id, tag, other_attributes=None):
        super(Event, self).__init__()
        self.title = title
        self.tag = tag
        self.uri = utilities.create_uri("data", id)
        self.place = utilities.get_places(tag)
        self.event_type = get_event_type(tag)
        self.actors = get_actors(tag)

        self.text = utilities.limit_words(str(tag.CHRONPROSE.get_text()),
                                          MAX_WORD_COUNT)

        self.date_tag = get_date_tag(tag)
        self.time_type = get_time_type(self.date_tag)
        self.precision = self.date_tag.get("CERTAINTY")
        self.time_certainty = get_time_certainty(self.date_tag)

        # Determine sem predicate to use
        if self.date_tag.name == "DATERANGE":
            self.date = self.date_tag.get("FROM") + ":" + self.date_tag.get(
                "TO")
            self.predicate = None
        else:
            self.date = self.date_tag.get("VALUE")
            if self.precision == "CERT":
                self.predicate = utilities.NS_DICT["sem"].hasTimeStamp
            elif self.precision == "BY":
                self.predicate = utilities.NS_DICT[
                    "sem"].hasLatestBeginTimeStamp
            elif self.precision == "AFTER":
                self.predicate = utilities.NS_DICT[
                    "sem"].hasEarliestBeginTimeStamp
            elif self.precision is None:
                self.predicate = utilities.NS_DICT["sem"].hasTimeStamp
            else:
                self.predicate = utilities.NS_DICT["sem"].hasTime
            self.date = format_date(self.date)
Пример #8
0
    def to_triple(self, person=None):
        # if tag is a describing None create the identifying triples
        g = utilities.create_graph()

        # Creating Textual body first
        snippet_uri = rdflib.term.URIRef(str(self.uri) + "_Snippet")

        if person:
            source_url = rdflib.term.URIRef(self.src + person.id + "#" +
                                            self.heading)
            snippet_label = person.name + " - " + self.context_label + " snippet"
        else:
            source_url = rdflib.term.URIRef(self.src + "#FE")
            snippet_label = "FE" + " - " + self.context_label + " snippet"

        g.add((snippet_uri, RDF.type, utilities.NS_DICT["oa"].TextualBody))
        g.add((snippet_uri, RDFS.label, rdflib.term.Literal(snippet_label)))
        g.add((snippet_uri, utilities.NS_DICT["oa"].hasSource, source_url))
        g.add((snippet_uri, utilities.NS_DICT["dcterms"].description,
               rdflib.term.Literal(self.text,
                                   datatype=rdflib.namespace.XSD.string)))

        # Creating identifying context first and always
        if person:
            context_label = person.name + " - " + self.context_label + " identifying annotation"
        else:
            context_label = self.context_label + " identifying annotation"

        identifying_uri = utilities.create_uri("data",
                                               self.id + "_identifying")
        g.add((identifying_uri, RDF.type, self.context_type))
        g.add(
            (identifying_uri, RDFS.label, rdflib.term.Literal(context_label)))
        g.add(
            (identifying_uri, utilities.NS_DICT["oa"].hasTarget, snippet_uri))
        g.add((identifying_uri, utilities.NS_DICT["oa"].motivatedBy,
               utilities.NS_DICT["oa"].identifying))
        self.subjects += identifying_motivation(self.tag)
        if self.triples and person:
            self.subjects += self.get_subjects(self.triples, person)
        for x in self.subjects:
            g.add((identifying_uri, utilities.NS_DICT["oa"].hasBody, x))

        if person:
            g.add(
                (identifying_uri, utilities.NS_DICT["oa"].hasBody, person.uri))

        if self.event:
            g.add((identifying_uri, utilities.NS_DICT["cwrc"].hasEvent,
                   self.event))

        # Creating describing context if applicable
        if self.motivation == utilities.NS_DICT["oa"].describing:
            self.uri = utilities.create_uri("data", self.id + "_describing")
            context_label = person.name + " - " + self.context_label + " describing annotation"
            g.add((self.uri, RDF.type, self.context_type))
            g.add((self.uri, RDFS.label, rdflib.term.Literal(context_label)))
            g.add((self.uri, utilities.NS_DICT["cwrc"].hasIDependencyOn,
                   identifying_uri))
            g.add((self.uri, utilities.NS_DICT["oa"].hasTarget, person.uri))
            g.add((self.uri, utilities.NS_DICT["oa"].hasTarget, snippet_uri))
            g.add((self.uri, utilities.NS_DICT["oa"].motivatedBy,
                   self.motivation))

            for x in self.subjects:
                g.add((self.uri, utilities.NS_DICT["dcterms"].subject, x))

            for x in self.triples:
                temp_str = x.to_triple(person).serialize(
                    format="ttl").decode().splitlines()
                triple_str_test = [
                    y for y in temp_str if "@prefix" not in y and y != ''
                ]
                if len(triple_str_test) == 1:
                    triple_str = x.to_triple(person).serialize(
                        format="ttl").decode().splitlines()[-2]
                    g += self.create_ttl_body(triple_str)
                else:
                    triple_str = "\n".join(triple_str_test)
                    g += self.create_multiple_triples(x.to_triple(person))

            if self.event:
                g.add(
                    (self.uri, utilities.NS_DICT["cwrc"].hasEvent, self.event))
                g.add((self.event, utilities.NS_DICT["cwrc"].hasContext,
                       self.uri))

        # Creating the mentioned people as natural person
        for x in self.tag.find_all("NAME"):
            uri = utilities.make_standard_uri(x.get("STANDARD"))
            g.add((uri, RDF.type, utilities.NS_DICT["cwrc"].NaturalPerson))
            g.add((uri, RDFS.label,
                   Literal(x.get("STANDARD"),
                           datatype=rdflib.namespace.XSD.string)))
            g.add((uri, utilities.NS_DICT["foaf"].name,
                   Literal(x.get("STANDARD"),
                           datatype=rdflib.namespace.XSD.string)))

        return g