示例#1
0
def add_authorship_links():
    """
    Create people to authorship links.
    """
    logger.info("Adding additional authorships.")
    vstore = backend.get_store()
    while True:
        logger.info("Fetching people to authorship batch.")
        q = rq_prefixes + """
        CONSTRUCT {
            ?aship vivo:relates ?p .
        }
        WHERE {
            ?p a foaf:Person ;
                wos:daisNg ?dais .
            ?aship a vivo:Authorship ;
                wos:daisNg ?dais .
            FILTER NOT EXISTS { ?aship vivo:relates ?p }
        }
        LIMIT 500
        """
        logger.info("Authorship query:\n" + q)
        try:
            g = vstore.query(q).graph
            vstore.bulk_add(PEOPLE_AUTHORSHIP, g)
        except ResultException:
            break
    return True
def process(named_graph, batch, dry=False, sleep=10):
    while True:
        vstore = backend.get_store()
        logger.info("Querying {} for triples to remove.".format(named_graph))
        q = """
        CONSTRUCT {
            ?s ?p ?o .
        }
        WHERE {
            GRAPH <?g> {
                ?s ?p ?o .
            }
        }
        LIMIT ?batch
        """.replace("?g", named_graph).replace("?batch", batch)
        logger.info(q)
        try:
            rsp = vstore.query(q)
            g = rsp.graph
            num_found = len(g)
        except ResultException:
            logger.info("No triples to remove.")
            break
        logger.info("Removing {} triples from {}.".format(
            num_found, named_graph))
        rm = vstore.bulk_remove(named_graph, g)
        #if num_found < batch:
        #    break
        if sleep > 0:
            logger.info("Sleeping between batches.")
            time.sleep(sleep)

    return True
def process(clean_file, dry=False):
    vstore = backend.get_store()

    name_key = dict()
    # Map clean names
    with open(clean_file) as inf:
        inf.next()
        for n, row in enumerate(csv.reader(inf)):
            pname, ename = row[0].strip(), row[1].strip()
            if ename == "":
                continue
            logger.info("Cleaning {} with variant {}.".format(pname, ename))
            # Always create variant for the preferred name too.
            name_key[pname] = pname
            name_key[ename] = pname

    addg, removeg = index_orgs(name_key)

    graphs = [ADDRESS_GRAPH, SUBORG_GRAPH]

    # Remove from these graphs
    for g in graphs:
        logger.info("Removing preferred name triples with {} triples from {} graph.".format(len(removeg), g))
        rm2 = vstore.bulk_remove(g, removeg)

    logger.info("Adding preferred name triples with {} triples.".format(len(addg)))
    add = vstore.bulk_add(, addg)
示例#4
0
def build_dtu_people():
    """
    Relate person entities to unified organizations.
    """
    logger.info("Adding DTUResearcher type.")
    vstore = backend.get_store()
    q = rq_prefixes + """
    CONSTRUCT {
        ?person a wos:DTUResearcher
    }
    WHERE {
        d:org-technical-university-of-denmark a wos:UnifiedOrganization ;
               vivo:relatedBy ?address .
        ?address a wos:Address ;
               vivo:relatedBy ?authorship ;
               vivo:relates d:org-technical-university-of-denmark .
        ?authorship a vivo:Authorship ;
                vivo:relates ?person .
        ?person a foaf:Person .
    }
    """
    logger.info("DTU people query:\n" + q)

    g = vstore.query(q).graph
    vstore.bulk_add(AFFILIATION_NG, g)
示例#5
0
def build_unified_affiliation():
    """
    Relate person entities to unified organizations.
    """
    logger.info("Adding unified affiliation.")
    vstore = backend.get_store()
    while True:
        q = rq_prefixes + """
        CONSTRUCT {
            ?person a wos:ExternalResearcher .
            ?person wos:hasAffiliation ?unifOrg .
        }
        WHERE {
            ?unifOrg a wos:UnifiedOrganization ;
                   vivo:relatedBy ?address .
            ?address a wos:Address ;
                   vivo:relatedBy ?authorship ;
                   vivo:relates ?unifOrg .
            ?authorship a vivo:Authorship ;
                    vivo:relates ?person .
            ?person a foaf:Person .
            FILTER (?unifOrg !=  d:org-technical-university-of-denmark)
            FILTER NOT EXISTS { ?person a wos:ExternalResearcher }
        }
        LIMIT 500
        """
        logger.info("Affiliation query:\n" + q)
        try:
            g = vstore.query(q).graph
            vstore.bulk_add(AFFILIATION_NG, g)
        except ResultException:
            break
def get_unified_orgs():
    q = rq_prefixes + """
    select ?wosU ?org
    where {
        ?wosU a wos:UnifiedOrganization ;
            rdfs:label ?org .
    }
    """
    vstore = backend.get_store()
    out = []
    for row in vstore.query(q):
        out.append((row.wosU.toPython(), row.org.toPython()))
    return out
示例#7
0
def get_journals():
    q = rq_prefixes + """

    select ?j ?issn
    where {
        ?j bibo:issn ?issn .
    }
    """
    vstore = backend.get_store()
    d = {}
    for row in vstore.query(q):
        d[row.issn.toPython()] = row.j
    return d
def get_wos_pubs():
    vstore = backend.get_store()
    rq = rq_prefixes + """
            select ?pub ?wosId
            where {
                ?pub wos:wosId ?wosId .
            }
    """
    d = {}
    logger.debug(rq)
    for row in vstore.query(rq):
        logger.debug(row)
        d[row.wosId.toPython().replace('WOS:', '')] = row.pub
    return d
示例#9
0
def get_existing_people():
    logger.info("Getting existing profiles.")
    q = rq_prefixes + """
    select ?p
    where
    {
        ?p a foaf:Person
    }
    """
    vstore = backend.get_store()
    out = []
    for row in vstore.query(q):
        out.append(row.p)
    return out
def get_existing_address(uri):
    vstore = backend.get_store()
    rq = rq_prefixes + """
    SELECT ?address
    WHERE {
        ?uri vivo:relatedBy ?address.
        ?address a wos:Address .
    }
    """
    rmg = Graph()
    addr_uris = []
    for row in vstore.query(rq, initBindings={'uri': uri}):
        addr_uris.append(row.address)
        rmg.add((row.address, VIVO.relates, uri))
        rmg.add((uri, VIVO.relatedBy, row.address))
    return addr_uris, rmg
示例#11
0
def build_email_profiles():
    """
    Builds profiles for researchers with emails and a minimum number of publications.
    """
    q = rq_prefixes + """
        select 
            (COUNT(?aship) as ?num)
            ?email
            (SAMPLE(?fullName) AS ?name) 
            (group_concat(distinct ?fullName ; separator = "|") AS ?full_names)
            (SAMPLE(?first) AS ?firstName) 
            (SAMPLE(?last) AS ?lastName) 
            (group_concat(distinct ?daisNg ; separator = "|") AS ?dais) 
        where {
            ?aship a vivo:Authorship ;
                wos:fullName ?fullName ;
                rdfs:label ?label ;
                wos:daisNg ?daisNg ;
                wos:email ?email ;
                wos:firstName ?first ;
                wos:lastName ?last .
            FILTER NOT EXISTS {
                ?p a foaf:Person ;
                    wos:daisNg ?daisNg .
            }
        }
        GROUP BY ?email
        HAVING (?num >= 3)
        ORDER BY DESC(?num)
    """
    logger.info("Email profiles query:\n" + q)
    vstore = backend.get_store()
    existing = get_existing_people()
    g = Graph()
    for person in vstore.query(q):
        name = person.name.toPython()
        email = person.email.toPython()
        dais_ids = [d for d in person.dais.toPython().split("|")]
        logger.info("Building profile for {} with {}.".format(name, email))
        vper = Researcher(person, dais_ids)
        if vper.uri in existing:
            logger.info("Profile exists for {}.".format(vper.uri))
            continue
        g += vper.to_rdf()
    vstore.bulk_add(PEOPLE_EMAIL_GRAPH, g)
示例#12
0
def remove_internal_external():
    """
    Remove the wos:ExternalResearcher class from those that
    are also DTU researchers.
    """
    logger.info("Removing external researcher from internal researchers.")
    vstore = backend.get_store()
    q = rq_prefixes + """
    CONSTRUCT {
        ?r a wos:ExternalResearcher .
    }
    WHERE {
        ?r a wos:ExternalResearcher, wos:DTUResearcher .
    }
    """
    try:
        g = vstore.query(q).graph
        vstore.bulk_remove(AFFILIATION_NG, g)
    except ResultException:
        pass
def index_orgs(name_key):
    uri_pubs = defaultdict(list)
    idx = defaultdict(lambda: defaultdict(dict))
    vstore = backend.get_store()

    rmg = Graph()
    addg = Graph()

    q = rq_prefixes + """
    SELECT DISTINCT ?org ?name
    WHERE {
        ?org a wos:SubOrganization ;
            rdfs:label ?label ;
            wos:subOrganizationName ?name ;
            vivo:relatedBy ?address .
        ?address a wos:Address ;
            vivo:relates ?pub, d:org-technical-university-of-denmark .
    }
    """
    for row in vstore.query(q):
        #print row.org, row.name
        existing_name = row.name
        pname = name_key.get(existing_name.toPython())
        if pname is not None:
            logger.info("Processing existing name {} to clean name {}.".format(existing_name, pname))
            addr_uris, to_remove = get_existing_address(row.org)
            rmg  += to_remove
            new_uri = slug_uri(pname, prefix="dtusuborg")
            addg.add((new_uri, RDF.type, WOS.SubOrganization))
            addg.add((new_uri, RDFS.label, Literal(pname)))
            addg.add((new_uri, WOS.subOrganizationName, Literal(pname)))
            addg.add((new_uri, WOS.subOrganizationNameVariant, existing_name))
            for auri in addr_uris:
                addg.add((auri, VIVO.relates, new_uri))

    return addg, rmg
示例#14
0
def process(triple_files, format="nt", dry=False, sync=False, sleep=10, size=DEFAULT_BATCH_SIZE):
    vstore = backend.get_store()
    for fpath in triple_files:
        g = Graph()
        g.parse(source=fpath, format=format)
        named_graph = NG_BASE + fpath.split("/")[-1].split(".")[0]
        logger.info("Processing updates with {} triples to {} and batch size {}.".format(len(g), named_graph, size))
        if dry is True:
            logger.info("Dry run. No changes made.")
        else:
            if sync is True:
                logger.info("Syncing graph to {}.".format(named_graph))
                added, removed = backend.sync_updates(named_graph, g, size=size)
            else:
                logger.info("Posting graph as updates to {}.".format(named_graph))
                added = vstore.bulk_add(named_graph, g, size=size)
                removed = 0
            if (added > 0) or (removed > 0):
                if sleep > 0:
                    logger.info("Sleeping for {} seconds between files.".format(sleep))
                    time.sleep(sleep)
            else:
                logger.info("No changes made to {}.".format(named_graph))
    return True
    'RUSSIA': 'russian-federation',
    'VENEZUELA': 'venezuela-bolivarian-republic-of',
    'BOLIVIA': 'bolivia',
    'CZECH REPUBLIC': 'czech-republic',
    'IVORY COAST': 'ivory-cost',
    'TAIWAN': 'china',
    'SOUTH KOREA': 'south-korea',
    'VIETNAM': 'viet-nam',
    'IRAN': 'iran-islamic-republic-of',
    'REPUBLIC OF GEORGIA': 'georgia',
    'REUNION': 'reunion',
    'TANZANIA': 'united-republic-of-tanzania',
    'BOSNIA & HERZEGOVINA': 'bosnia-herzegovina',
}

store = backend.get_store()


def mk_slug(raw):
    clean = raw.strip().lower()
    return slugify(clean)


def fetch_vivo_countries():
    q = rq_prefixes + """
    SELECT ?uri ?label ?code
    WHERE {
      ?uri a vivo:Country ;
           rdfs:label ?label ;
           <http://aims.fao.org/aos/geopolitical.owl#codeISO3> ?code .
    }
示例#16
0
def build_dais_profiles():
    """
    Builds profiles for researchers by DAIS and a minimum number of publications.
    """
    with open(ORCID_FILE) as inf:
        d_to_o = json.load(inf)
    with open(RID_FILE) as inf:
        d_to_r = json.load(inf)
    with open(AU_ID_FILE) as inf:
        auid_to_dais = json.load(inf)
    q = rq_prefixes + """
        select 
            (COUNT(?aship) as ?num)
            ?dais
            (group_concat(distinct ?fullName ; separator = "|") AS ?full_names)
            (SAMPLE(?fullName) AS ?name) 
            (SAMPLE(?first) AS ?firstName) 
            (SAMPLE(?last) AS ?lastName) 
        where {
            ?aship a vivo:Authorship ;
                vivo:relates ?addr ;
                wos:fullName ?fullName ;
                rdfs:label ?label ;
                wos:daisNg ?dais ;
                wos:firstName ?first ;
                wos:lastName ?last .
        }
        GROUP BY ?dais
        HAVING (?num >= 20)
        ORDER BY DESC(?num)
    """
    logger.info("DAIS profiles query:\n" + q)
    vstore = backend.get_store()
    g = Graph()
    for person in vstore.query(q):
        name = person.name.toPython()
        dais = person.dais.toPython()
        # Add RID, ORCID and additional DAIS if possible.
        orcids = d_to_o.get(dais, [None])
        rids = d_to_r.get(dais, [None])
        if len(orcids) > 1:
            orcid = None
        else:
            orcid = orcids[0]
        if len(rids) > 1:
            rid = None
        else:
            rid = rids[0]
        person_uri = D['person-' + dais]
        dais_ids = [dais]
        if orcid is not None:
            dais_ids += auid_to_dais.get(orcid, [])
        elif rid is not None:
            dais_ids += auid_to_dais.get(rid, [])
        dais_ids = [dais]
        logger.info("Building profile for {} with {}.".format(name, dais))
        vper = Researcher(person, dais_ids)
        g += vper.to_rdf()
        if orcid is not None:
            g.add((vper.uri, WOS.orcid, Literal(orcid)))
        if rid is not None:
            g.add((vper.uri, WOS.orcid, Literal(orcid)))

    vstore.sync_named_graph(PEOPLE_GRAPH, g)
示例#17
0
def build_orcid_rid_profiles():
    """
    Builds profiles for researchers with RIDs or ORCIDs.
    """
    q = rq_prefixes + """
    select 
        (COUNT(?aship) as ?num)
        ?dais 
        (group_concat(distinct ?fullName ; separator = "|") AS ?full_names)
        (SAMPLE(?fullName) AS ?name) 
        (SAMPLE(?first) AS ?firstName) 
        (SAMPLE(?last) AS ?lastName) 
    where {
        ?aship a vivo:Authorship ;
            wos:fullName ?fullName ;
            rdfs:label ?label ;
            wos:daisNg ?dais ;
            wos:firstName ?first ;
            wos:lastName ?last ;
            vivo:relates ?addr .
    }
    GROUP BY ?dais
    #HAVING (?num >= 3)
    """
    logger.info("Author ID profiles query:\n" + q)
    vstore = backend.get_store()
    with open(ORCID_FILE) as inf:
        d_to_o = json.load(inf)
    with open(RID_FILE) as inf:
        d_to_r = json.load(inf)
    with open(AU_ID_FILE) as inf:
        auid_to_dais = json.load(inf)
    existing = get_existing_people()
    done = []
    g = Graph()
    for person in vstore.query(q):
        orcid = None
        rid = None
        dais = person.dais.toPython()
        if dais in done:
            continue
        name = person.name.toPython()
        orcids = d_to_o.get(dais)
        rids = d_to_r.get(dais)
        if orcids is not None:
            if len(orcids) > 1:
                logger.info("Ignoring {}. Multiple ORCIDs found.".format(dais))
                continue
            orcid = orcids[0]
        elif rids is not None:
            if len(rids) > 1:
                logger.info("Ignoring {}. Multiple RIDs found.".format(dais))
                continue
            rid = rids[0]
        else:
            logger.info("Skipping {} - no RID or ORCID".format(dais))
            continue
        dais_ids = auid_to_dais[orcid or rid]
        for did in dais_ids:
            if did in done:
                continue
        logger.info("Building profile for {} with {}.".format(
            name, orcid or rid))
        done += dais_ids
        vper = Researcher(person, dais_ids)
        if vper.uri in existing:
            logger.info("Profile exists with URI {}.".format(vper.uri))
            continue
        if orcid is not None:
            g.add((vper.uri, WOS.orcid, Literal(orcid)))
        elif rid is not None:
            g.add((vper.uri, VIVO.researcherId, Literal(rid)))
        g += vper.to_rdf()
    vstore.bulk_add(PEOPLE_IDENTIFIERS_GRAPH, g)