Exemplo n.º 1
0
def generate_orgs_to_pubs():
    """
    Relate pubs to orgs through publication cards.
    """
    top_org = "638881"

    internal_orgs_query = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Organisation" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
        <attribute operator="equals" argument="12000" name="intorext"/>
        <relation minCount="1" name="CARD_has_ORGA">
             <attribute operator="equals" argument="12006" name="typeOfCard"/>
         </relation>
      </filter>
     </query>
    </data>
    """

    pubs_for_org_query = """
    <data xmlns="http://converis/ns/webservice">
    <query>
        <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:ns2="http://converis/ns/sortingengine">
            <return>
                <attributes>
                </attributes>
            </return>
            <and>
            <relation name="PUBL_has_CARD">
                <relation relatedto="{}" name="CARD_has_ORGA">
                </relation>
            </relation>
            </and>
        </filter>
    </query>
    </data>
    """
    logger.info("Fetching orgs with pub cards:\n" + internal_orgs_query)
    orgs = []
    for org in client.filter_query(internal_orgs_query):
        orgs.append(org.cid)
    org_set = set(orgs)

    logger.info("Relating {} orgs to pubs.".format(len(org_set)))
    g = Graph()
    for oid in org_set:
        if oid == top_org:
            continue
        logger.info("Processing orgs to pubs for org {}.".format(oid))
        q = pubs_for_org_query.format(oid)
        for pub in client.filter_query(q):
            ouri = models.org_uri(oid)
            pub_uri = models.pub_uri(pub.cid)
            logger.debug("Orgs to pubs. Processing org {} pub {}.".format(oid, pub.cid))
            g.add((ouri, VIVO.relates, pub_uri))
    backend.sync_updates("http://localhost/data/org-pubs", g)
Exemplo n.º 2
0
def build_short_url_index():
	logger.info("Building people shortURL index.")
	people_query = """
	<data xmlns="http://converis/ns/webservice">
	<return>
	<attributes>
	<attribute name="shortURL"/>
	</attributes>
	</return>
	<query>
	<filter for="Person" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
	<and>
	  <attribute operator="notequals" argument="" name="shortURL"/>
	</and>
	</filter>
	</query>
	</data>
	"""
	d = {}
	for item in client.filter_query(people_query):
		if validate_slug(item.shorturl) is False:
			logger.info("{} - {} is not a valid shortURL.".format(item.cid, item.shorturl))
			continue
		d[item.cid] = item.shorturl
	logger.info("Building org shortURL index.")
	# orgs too
	org_query = """
	<data xmlns="http://converis/ns/webservice">
	<return>
	<attributes>
	<attribute name="shortURL"/>
	</attributes>
	</return>
	<query>
	<filter for="Organisation" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
	<and>
	  <attribute operator="notequals" argument="" name="shortURL"/>
	</and>
	</filter>
	</query>
	</data>
	"""
	for item in client.filter_query(org_query):
		if validate_slug(item.shorturl) is False:
			logger.info("{} - {} is not a valid shortURL.".format(item.cid, item.shorturl))
			continue
		d[item.cid] = item.shorturl
	# write to disk
	with open(SHORT_URLS, 'w+') as of:
		pickle.dump(d, of)
Exemplo n.º 3
0
def get_people(sample=False):
    q = """
    <data xmlns="http://converis/ns/webservice">
     <return>
      <attributes>
       <attribute name="Short description"/>
       <attribute name="cfFamilyNames"/>
       <attribute name="cfFirstNames"/>
       <attribute name="middleName"/>
       <attribute name="email"/>
       <attribute name="ORCID"/>
       <attribute name="academicTitle"/>
       <attribute name="cfResInt"/>
      </attributes>
     </return>
     <query>
      <filter for="Person" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
       <and>
        <and>
         <attribute argument="12105" name="typeOfPerson" operator="equals"/>
        </and>
       </and>
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for person in client.filter_query(q):
        g += client.to_graph(person, models.Person)
        done += 1
        if sample is True:
            if done >= 10:
                break
    return g
Exemplo n.º 4
0
def single_thread_harvest():
    g = Graph()
    for item in client.filter_query(query):
        g += client.to_graph(item, models.ClinicalTrial)
        #print item.cid, item.name
    #print>>sys.stderr, "adding triples", len(g)
    backend.sync_updates(ng, g)
Exemplo n.º 5
0
def get_pub_cards():
    q = """
    <data xmlns="http://converis/ns/webservice">
      <return>
        <attributes>
        </attributes>
      </return>
     <query>
      <filter for="Card" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
      <and>
        <and>
            <relation minCount="1" name="PUBL_has_CARD"/>
        </and>
        <!-- <and>
            <relation name="PUBL_has_CARD">
                <attribute argument="2014" name="publYear" operator="greaterequal"/>
            </relation>
        </and> -->
      </and>
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for card in client.filter_query(q):
        g += models.pub_to_card(card.cid)
        done += 1
        if (done % 200) == 0:
            logging.info("Publications fetched: {}.".format(done))
    return g
Exemplo n.º 6
0
def harvest_journals():
    """
    Fetch all journals with pubs
    """
    logger.info("Harvesting journals.")
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Journal" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
      <and>
        <and>
         <relation minCount="1" name="PUBL_has_JOUR"/>
        </and>
      </and>
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for pub in client.filter_query(q):
        g += client.to_graph(pub, models.Journal)
        done += 1
    #print g.serialize(format='n3')
    backend.sync_updates("http://localhost/data/journals", g)
Exemplo n.º 7
0
def single_thread_harvest():
  ng = "http://localhost/data/people"
  g = Graph()
  for ety in client.filter_query(query):
      item = client.Entity('Person', ety.cid)
      # FH people only
      if hasattr(item, 'fhpersontype'):
        if item.fhpersontype['cid'] == '6019159':
          g += client.to_graph(item, models.Person)
  backend.post_updates(ng, g)
Exemplo n.º 8
0
def single_thread_harvest_awards(sample=True):
    """
    Fetch all news items
    """
    logger.info("Harvesting Awards.")
    g = Graph()
    done = 0
    for award in client.filter_query(query):
        g += client.to_graph(award, Award)
        done += 1
    backend.sync_updates(NG, g)
Exemplo n.º 9
0
def harvest():
    """
    Fetch all pics and write to file
    """
    logger.info("Harvesting all pictures.")
    g = Graph()
    for per_pict in client.filter_query(QUERY):
        g += client.to_graph(per_pict, PersonPicture)
    logger.info("Picture harvest complete")
    if len(g) < 200:
        logger.error("Picture data incomplete. Not updating")
    else:
        backend.sync_updates(NG, g)
Exemplo n.º 10
0
def get_areas():
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Area" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
      </filter>
     </query>
    </data>
    """
    g = Graph()
    for done, area in enumerate(client.filter_query(q)):
        g += client.to_graph(area, models.Expertise)
    return g
Exemplo n.º 11
0
def single_thread_harvest():
    """
    Fetch all news items
    """
    logger.info("Harvesting Teaching.")
    g = Graph()
    done = 0
    for award in client.filter_query(query):
        g += client.to_graph(award, models.TeachingLecture)
        done += 1
        #if (done >= 20):
        #    break
    print g.serialize(format='turtle')
    backend.sync_updates(NG, g)
Exemplo n.º 12
0
def single_thread_harvest():
    """
    Fetch all positions
    """
    logger.info("Harvesting Positions.")
    g = Graph()
    done = 0
    for pos in client.filter_query(query):
        g += client.to_graph(pos, models.Position)
        done += 1
        if done > 100:
            import ipdb
            ipdb.set_trace()
    backend.sync_updates(NG, g)
Exemplo n.º 13
0
def harvest_service(sample=False):
    """
    Fetch all service items
    """
    g = Graph()
    done = 0
    for item in client.filter_query(service_q):
        #print item.cid
        logger.error(item.cid)
        g += client.to_graph(item, Service)
        done += 1
        if (sample is True) and (done >= 100):
            break
    print g.serialize(format='n3')
    backend.sync_updates(NG, g)
Exemplo n.º 14
0
def sample_harvest():
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
        <attribute operator="equals" argument="10347" name="Publication type"/>
      </filter>
     </query>
    </data>
    """
    logger.info("Starting sample publications harvest.")
    g = Graph()
    for item in client.filter_query(q):
        g += client.to_graph(item, models.Publication)
    # print g.serialize(format="turtle")
    # backend.sync_updates replaces the named graph with the incoming data - meaning any
    # data in the system that's not in the incoming data will be deleted
    # backend.post_updates will only update the entities that are in the incoming data - anything
    # else is left as it is.
    backend.sync_updates("http://localhost/data/sample-books", g)
Exemplo n.º 15
0
def get_orgs():
    internal = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Organisation" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
       <and>
        <attribute argument="12000" name="intOrExt" operator="equals"/>
       </and>
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for q in [internal]:
        for org in client.filter_query(q):
            #if g.value(predicate=CONVERIS.converisId, object=Literal(org.cid)) is None:
            #    logging.debug("Mapping org {}.".format(org.cid))
            g += client.to_graph(org, models.Organization)
            done += 1
    return g
Exemplo n.º 16
0
def harvest_news(sample=False):
    """
    Fetch all news items
    """
    logger.info("Harvesting News.")
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="News" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for news in client.filter_query(q):
        g += client.to_graph(news, models.News)
        done += 1
        if (sample is True) and (done >= 20):
            break
    #print g.serialize(format='n3')
    backend.sync_updates("http://localhost/data/news", g)
Exemplo n.º 17
0
def harvest_updates(days=2, test=False):
    """
    Fetch updated pics and write to file.
    Default to days as 2 so that we get yesterday's date.
    """
    updated_date = days_ago(days)
    logger.info("Harvesting updated pictures since {}.".format(updated_date))
    query = QUERY.replace("2000-01-01", updated_date)
    g = Graph()
    done = 0
    for pict in client.filter_query(query):
        g += client.to_graph(pict, PersonPicture)
        done += 1
        if test is True:
            if done > 10:
                break
    if len(g) > 0:
        backend.post_updates(NG, g)
        logger.info(
            "Updated picture harvest complete. Updated: {}".format(done))
    else:
        logger.info("No updated pictures found.")
Exemplo n.º 18
0
def pub_harvest():
    q = """
    <data xmlns="http://converis/ns/webservice">
    <query>
    <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:ns2="http://converis/ns/sortingengine">
    <and>
        <and>
            <relation direction="lefttoright" name="PUBL_has_CARD">
                <relation direction="righttoleft"  name="PERS_has_CARD">
                    <attribute argument="6019159" name="fhPersonType" operator="equals"/>
                </relation>
            </relation>
        </and>
    </and>
    </filter>
    </query>
    </data>
    """
    g = Graph()
    for item in client.filter_query(q):
        g += client.to_graph(item, models.Publication)
    ng = "http://localhost/data/publications"
    backend.sync_updates(ng, g)
Exemplo n.º 19
0
def get_pubs():
    q = """
    <data xmlns="http://converis/ns/webservice">
     <query>
      <filter for="Publication" xmlns="http://converis/ns/filterengine" xmlns:sort="http://converis/ns/sortingengine">
      <and>
        <and>
         <relation minCount="1" name="PUBL_has_CARD"/>
        </and>
        <and>
         <attribute argument="2009" name="publYear" operator="greaterequal"/>
        </and>
      </and>
      </filter>
     </query>
    </data>
    """
    g = Graph()
    done = 0
    for pub in client.filter_query(q):
        g += client.to_graph(pub, models.Publication)
        done += 1
    return g
Exemplo n.º 20
0
def single_thread_harvest():
    g = Graph()
    for item in client.filter_query(internal_orgs_query):
        g += client.to_graph(item, models.Organization)
    backend.sync_updates(NG, g)
Exemplo n.º 21
0
def single_thread_harvest():
  g = Graph()
  for item in client.filter_query(query):
      g += client.to_graph(item, models.EducationTraining)
  backend.sync_updates(named_graph, g)