def construction_schema_objects(series): result = WOQLQuery().doctype(series.id, label=series.label, description=series.comment) if series.id in SIMPLE_TYPE_MAP: result = result.property(series.id+"Value", "xsd:"+SIMPLE_TYPE_MAP[series.id]) return result
def construct_prop_dr(series): result = [] if (type(series.domainIncludes) == str) and (',' in series.domainIncludes): result.append(WOQLQuery().doctype(series.id + "Domain")) if (type(series.rangeIncludes) == str) and (',' in series.rangeIncludes): result.append(WOQLQuery().doctype(series.id + "Range")) return result
def construct_simple_type_relations(): result = [] for key, value in SIMPLE_TYPE_MAP.items(): if value == "string" and key != "http://schema.org/Thing": result.append(WOQLQuery().add_quad(key, "subClassOf", "http://schema.org/Thing" ,"schema")) if value == "dateTime" and key != "http://schema.org/DateTime": result.append(WOQLQuery().add_quad(key, "subClassOf", "http://schema.org/DateTime" ,"schema")) return result
def generateMatchClause(code, type, i): """ Bug in python string conversion - have to do it automatically """ match = WOQLQuery().woql_and( WOQLQuery().idgen("doc:" + type, [WOQLQuery().string(code)], "v:ID_"+str(i)), WOQLQuery().cast(WOQLQuery().string(code), "xsd:string", "v:Label_"+ str(i)) ) return match
def create_schema_add_ons(client, queries): new_queries = [] for query_list in queries: if len(query_list) > 1: new_queries.append(WOQLQuery().woql_and(*query_list)) elif len(query_list) == 1: new_queries.append(query_list[0]) result_query = WOQLQuery().woql_and(*new_queries) return result_query.execute(client)
def generateMultiInsertQuery(codes, type): matches = [] inserts = [] index = 0 for code in codes: matches.append(generateMatchClause(code, type, index)) inserts.append(generateInsertClause(code, type, index)) index = index + 1 return WOQLQuery().when(WOQLQuery().woql_and(*matches), WOQLQuery().woql_and(*inserts))
def load_airline(series, countries_list): if (series['Airline ID'] == -1) or (pd.isna(series['Clean ID'])): return None query_obj = WOQLQuery().insert(series['Clean ID'], 'Airline', label=series['Name']) clean_country = _clean_id(series['Country'], prefix='country') if clean_country in countries_list: query_obj.property('registered_in', "doc:" + clean_country) return query_obj
def construction_schema_addon(series, type_list): result=[] if type(series.subTypes) == str: for kid in series.subTypes.split(','): kid = kid.strip() if kid in list(type_list): result.append(WOQLQuery().add_quad(kid, "subClassOf", series.id ,"schema")) if type(series.subTypeOf) == str: for parent in series.subTypeOf.split(','): parent = parent.strip() if parent in list(type_list): result.append(WOQLQuery().add_quad(series.id, "subClassOf", parent ,"schema")) return result
def load_csvs(client, csvs): """Load the CSVs as input Parameters ========== client : a WOQLClient() connection csvs : a dict of all csvs to be input """ for key, url in csvs.items(): csv = get_csv_variables(url) wrangles = get_wrangles() inputs = WOQLQuery().woql_and(csv, *wrangles) inserts = get_inserts() answer = WOQLQuery().when(inputs, inserts) answer.execute(client, f"Adding {url} into database")
def get_csv_variables(url): """Extracting the data from a CSV and binding it to variables Parameters ========== client : a WOQLClient() connection url : string, the URL of the CSV """ csv = WOQLQuery().get(WOQLQuery().woql_as( "councillor_a", "v:Rep_A").woql_as("councillor_b", "v:Rep_B").woql_as("party_a", "v:Party_A").woql_as( "party_b", "v:Party_B").woql_as("distance", "v:Distance")).remote(url) return csv
def construction_schema_addon_property(series, type_list): result=[WOQLQuery().add_quad(series.id,"rdf:type","owl:ObjectProperty","schema")] if (type(series.domainIncludes) == str): if (',' in series.domainIncludes): for domain in series.domainIncludes.split(','): domain = domain.strip() if domain in list(type_list): #result.append(WOQLQuery().add_quad(series.id, "domain", domain, "schema")) result.append(WOQLQuery().add_quad(domain, "subClassOf", series.id+"Domain", "schema")) result.append(WOQLQuery().add_quad(series.id, "domain", series.id+"Domain", "schema")) else: if series.domainIncludes in list(type_list): result.append(WOQLQuery().add_quad(series.id, "domain", series.domainIncludes, "schema")) if (type(series.rangeIncludes) == str): if (',' in series.rangeIncludes): for range in series.rangeIncludes.split(','): range = range.strip() if range in list(type_list): #result.append(WOQLQuery().add_quad(series.id, "range", range, "schema")) result.append(WOQLQuery().add_quad(range, "subClassOf", series.id+"Range", "schema")) result.append(WOQLQuery().add_quad(series.id, "range", series.id+"Range", "schema")) else: if series.rangeIncludes in list(type_list): result.append(WOQLQuery().add_quad(series.id, "range", series.rangeIncludes, "schema")) if len(result) < 3: return [] return result
def create_schema(client): region = WOQLQuery().doctype("Region", label="Region") seat = WOQLQuery().doctype("Seats", label="Seats") house = WOQLQuery().doctype("House", label="House").\ property("founder", "Person").\ property("words", "string").\ property("region", "Region").\ property("heir", "Person").\ property("overlord", "Person").\ property("seats", "Seats") person = WOQLQuery().doctype("Person", label="Person").\ property("gender", "string").\ property("father", "Person").\ property("mother", "Person").\ property("aliases", "string").\ property("spouse", "Person").\ property("children", "Person") return WOQLQuery().woql_and(region, seat, house, person).execute(client, "Create schma for Game of Thrones.")
def load_flight(series, airports, airlines): clean_id = f"{series['Airline Code']}_{series['Source Airport ID']}_{series['Destination Airport ID']}" query_obj = WOQLQuery().insert( clean_id, 'Flight', label= f"Flight by {series['Airline Code']} from {series['Source Airport ID']} to {series['Destination Airport ID']}" ) # departs if len(series['Source Airport ID']) == 3: lookup = 'IATA' elif len(series['Source Airport ID']) == 4: lookup = 'ICAO' else: lookup = None if lookup is not None: filter = airports[lookup] == series['Source Airport ID'] if filter.any(): airport_id = airports[filter]['Clean ID'].iloc[0] query_obj.property('departs', "doc:" + airport_id) # arrives if len(series['Destination Airport ID']) == 3: lookup = 'IATA' elif len(series['Destination Airport ID']) == 4: lookup = 'ICAO' else: lookup = None if lookup is not None: filter = airports[lookup] == series['Destination Airport ID'] if filter.any(): airport_id = airports[filter]['Clean ID'].iloc[0] query_obj.property('arrives', "doc:" + airport_id) # operated_by if len(series['Airline Code']) == 2: lookup = 'IATA' elif len(series['Airline Code']) == 3: lookup = 'ICAO' else: lookup = None if lookup is not None: filter = (airlines[lookup] == series['Airline Code']) if filter.any(): airline_id = airlines[filter]['Clean ID'].iloc[0] query_obj.property('operated_by', "doc:" + airline_id) return query_obj
def get_wrangles(): wrangles = [ WOQLQuery().idgen("doc:Party", ["v:Party_A"], "v:Party_A_ID"), WOQLQuery().idgen("doc:Party", ["v:Party_B"], "v:Party_B_ID"), WOQLQuery().idgen("doc:Representative", ["v:Rep_A"], "v:Rep_A_ID"), WOQLQuery().idgen("doc:Representative", ["v:Rep_B"], "v:Rep_B_ID"), WOQLQuery().typecast("v:Distance", "xsd:decimal", "v:Similarity"), WOQLQuery().idgen("doc:Similarity", ["v:Rep_A", "v:Rep_B"], "v:Rel_ID"), WOQLQuery().concat("v:Rep_A similarity v:Distance to v:Rep_B", "v:Rel_Label") ] return wrangles
def load_country(series): if pd.isna(series['Country ID']): return None query_obj = WOQLQuery().insert(series['Country ID'], 'Country', label=series['Name']) if pd.notna(series['ISO Code']): query_obj.property('iso_code', series['ISO Code']) if pd.notna(series['FIP Code']): query_obj.property('fip_code', series['FIP Code']) return query_obj
def get_inserts(): inserts = WOQLQuery().woql_and( WOQLQuery().insert("v:Party_A_ID", "Party", label="v:Party_A"), WOQLQuery().insert("v:Party_B_ID", "Party", label="v:Party_B"), WOQLQuery().insert("v:Rep_A_ID", "Representative", label="v:Rep_A").property("member_of", "v:Party_A_ID"), WOQLQuery().insert("v:Rep_B_ID", "Representative", label="v:Rep_B").property("member_of", "v:Party_B_ID"), WOQLQuery().insert("v:Rel_ID", "Similarity", label="v:Rel_Label"). property("similar_to", "v:Rep_A_ID"). property("similar_to", "v:Rep_B_ID"). property("similarity", "v:Similarity") ) return inserts
def create_schema(client): """The query which creates the schema Parameters ========== client : a WOQLClient() connection """ schema = WOQLQuery().woql_and( WOQLQuery().doctype("Party", label="Party", description="Political Party"), WOQLQuery().doctype("Representative", label="Representative", description="An elected member Dublin city council"). property("member_of", "Party", label="Member of").cardinality(1), WOQLQuery().doctype("Similarity", label="Similarity"). property("similarity", "decimal", label="Similarity"). property("similar_to", "Representative", label="Similar To").cardinality(2) ) return schema.execute(client, "Creating schema for Dublin voting data")
def extract_data(data, id='event/'): """Recursive function to craw through the data and create WOQLQuery objects""" if type(data) == dict: data_type = data['type'] WOQLObj = WOQLQuery().insert('doc:'+id, data_type) if data_type == 'http://schema.org/DateTime': date_value = {"@value" : data['value'], "@type" : "xsd:dateTime"} execution_queue.append(WOQLObj.property(data_type+'Value', date_value)) return for prop in data['properties']: extract_data(data['properties'][prop], id+prop+'/') WOQLObj = WOQLObj.property('http://schema.org/'+prop, 'doc:'+id+prop+'/') execution_queue.append(WOQLObj) else: if '://' in data: data_type = 'http://schema.org/URL' else: data_type = 'http://schema.org/Text' WOQLObj = WOQLQuery().insert('doc:'+id, data_type) data_obj = {"@value" : data, "@type" : "xsd:string"} execution_queue.append(WOQLObj.property(data_type+'Value',data_obj))
data_type = data['type'] WOQLObj = WOQLQuery().insert('doc:'+id, data_type) if data_type == 'http://schema.org/DateTime': date_value = {"@value" : data['value'], "@type" : "xsd:dateTime"} execution_queue.append(WOQLObj.property(data_type+'Value', date_value)) return for prop in data['properties']: extract_data(data['properties'][prop], id+prop+'/') WOQLObj = WOQLObj.property('http://schema.org/'+prop, 'doc:'+id+prop+'/') execution_queue.append(WOQLObj) else: if '://' in data: data_type = 'http://schema.org/URL' else: data_type = 'http://schema.org/Text' WOQLObj = WOQLQuery().insert('doc:'+id, data_type) data_obj = {"@value" : data, "@type" : "xsd:string"} execution_queue.append(WOQLObj.property(data_type+'Value',data_obj)) extract_data(data['microdata'][0]) db_id = "schema_tutorial" client = WOQLClient(server_url = "http://localhost:6363") client.connect(key="root", account="admin", user="******") existing = client.get_metadata(db_id, client.uid()) if not existing: client.create_database(db_id, "admin", { "label": "Schema.org Graph", "comment": "Create a graph with Schema.org data"}) else: client.db(db_id) WOQLQuery().woql_and(*execution_queue).execute(client)
def create_schema_objects(client, queries): result_query = WOQLQuery().woql_and(*queries) return result_query.execute(client)
def generateInsertClause(code, type, i): insert = WOQLQuery().woql_and(WOQLQuery().insert("v:ID_" + str(i), type, label="v:Label_" + str(i))) return insert
def load_data(client, houses, characters): results = [] for ppl in characters: ppl_obj = WOQLQuery().insert("Person_"+str(ppl["Id"]), "Person") if len(ppl["Name"]) == 0: ppl_obj.label("Unknown") else: ppl_obj.label(ppl["Name"]) if ppl["IsFemale"]: ppl_obj.property("gender", WOQLQuery().string("Female")) else: ppl_obj.property("gender", WOQLQuery().string("Male")) if ppl["Father"] is not None: ppl_obj.property("father", "doc:Person_"+str(ppl["Father"])) if ppl["Mother"] is not None: ppl_obj.property("mother", "doc:Person_"+str(ppl["Mother"])) if ppl["Spouse"] is not None: ppl_obj.property("spouse", "doc:Person_"+str(ppl["Spouse"])) for child in ppl["Children"]: ppl_obj.property("children", "doc:Person_"+str(child)) for alias in ppl["Aliases"]: ppl_obj.property("aliases", WOQLQuery().string(alias)) results.append(ppl_obj) for hus in houses: if hus["Region"] is not None: results.append(WOQLQuery().insert("Region_"+hus["Region"], "Region", label=hus["Region"])) for seat in hus["Seats"]: results.append(WOQLQuery().insert("Seats_"+seat, "Seats", label=seat)) hus_obj = WOQLQuery().insert("hus"+str(hus["Id"]), "House").label(hus["Name"]) if hus["Region"] is not None: hus_obj.property("region", "doc:Region_"+hus["Region"]) for seat in hus["Seats"]: hus_obj.property("seats", "doc:Seats_"+seat) if hus["Founder"] is not None: hus_obj.property("founder", "doc:Person_"+str(hus["Founder"])) if hus["Words"] is not None: data_obj = {"@value" : hus["Words"], "@type" : "xsd:string"} hus_obj.property("words", data_obj) if hus["Heir"] is not None: hus_obj.property("heir", "doc:Person_"+str(hus["Heir"])) if hus["Overlord"] is not None: hus_obj.property("overlord", "doc:Person_"+str(hus["Overlord"])) results.append(hus_obj) return WOQLQuery().woql_and(*results).execute(client, "Adding data for Game of Thrones.")
def create_schema(client): """The query which creates the schema Parameters - it uses variables rather than the fluent style as an example ========== client : a WOQLClient() connection """ base = WOQLQuery().doctype("EphemeralEntity", label="Ephemeral Entity", description="An entity that has a lifespan") base.property("lifespan_start", "dateTime", label="Existed From") base.property("lifespan_end", "dateTime", label="Existed To") country = WOQLQuery().add_class("Country").label("Country").description( "A nation state").parent("EphemeralEntity") country.property("iso_code", "string", label="ISO Code") country.property("fip_code", "string", label="FIP Code") airline = WOQLQuery().add_class("Airline").label("Airline").description( "An operator of airplane flights").parent("EphemeralEntity") airline = airline.property("registered_in", "scm:Country", label="Registered In"), airport = WOQLQuery().add_class("Airport").label("Airport").description( "An airport where flights terminate").parent("EphemeralEntity") airport.property("situated_in", "Country", label="Situated In"), flight = WOQLQuery().add_class("Flight").label("Flight").description( "A flight between airports").parent("EphemeralEntity") flight.property("departs", "Airport", label="Departs") flight.property("arrives", "Airport", label="Arrives") flight.property("operated_by", "Airline", label="Operated By") schema = WOQLQuery().woql_and(base, country, airline, airport, flight) return schema.execute(client, "Creating schema for flight data")
if lookup is not None: filter = (airlines[lookup] == series['Airline Code']) if filter.any(): airline_id = airlines[filter]['Clean ID'].iloc[0] query_obj.property('operated_by', "doc:" + airline_id) return query_obj flights_query = routes.apply(load_flight, axis=1, airports=airports, airlines=airlines).dropna() db_id = "pyplane" client = WOQLClient(server_url="http://localhost:6363") client.connect(key="root", account="admin", user="******") existing = client.get_metadata(db_id, client.uid()) if not existing: client.create_database(db_id, "admin", label="Flight Graph", description="Create a graph with Open Flights data") else: client.db(db_id) WOQLQuery().woql_and(*countries_query).execute(client, "Insert countries data") WOQLQuery().woql_and(*airlines_query).execute(client, "Insert airlines data") WOQLQuery().woql_and(*airports_query).execute(client, "Insert airports data") WOQLQuery().woql_and(*flights_query).execute(client, "Insert flights data")