def query_location(self, article): article_url = '<http://' + language + '.wikipedia.org/wiki/' + article + '>' location = None # SPARQL query that namespaces = """ PREFIX dbo: <http://dbpedia.org/resource/classes#> PREFIX dbp: <http://dbpedia.org/property/> """ location_query = """ { SELECT * WHERE { ?match dbp:latitude ?lat . ?match dbp:longitude ?long } } UNION { SELECT * WHERE { ?match geo:lat ?lat . ?match geo:long ?long } } UNION { SELECT * WHERE { ?match dbp:latDeg ?latDeg . ?match dbp:latMin ?latMin . ?match dbp:latSec ?latSec . ?match dbp:lonDeg ?lonDeg . ?match dbp:lonMin ?lonMin . ?match dbp:lonSec ?lonSec } } UNION { SELECT * WHERE { ?match dbp:latDeg ?latDeg . ?match dbp:latMin ?latMin . ?match dbp:latSec ?latSec . ?match dbp:lonDeg ?lonDeg . ?match dbp:lonMin ?lonMin . ?match dbp:lonSec ?lonSec . ?match dbp:latDir ?latDir . ?match dbp:lonDir ?lonDir } } UNION { SELECT * WHERE { ?match dbp:latDegrees ?latDeg . ?match dbp:latMinutes ?latMin . ?match dbp:latSeconds ?latSec . ?match dbp:longDegrees ?lonDeg . ?match dbp:longMinutes ?lonMin . ?match dbp:longSeconds ?lonSec . ?match dbp:latDirection ?latDir . ?match dbp:longDirection ?lonDir } } UNION { SELECT * WHERE { ?match dbp:latd ?lat . ?match dbp:longd ?long } } """ query_string = namespaces + """ SELECT * WHERE { ?match foaf:isPrimaryTopicOf """ + article_url + """ . """ + location_query + """ } """ query_string_with_offset = query_string self.sparql.setQuery(query_string_with_offset) try: results = self.sparql.query().convert() except QueryBadFormed: print "SPARQL query bad formed: " + query_string_with_offset return None except urllib2.HTTPError: print "HTTP Error 502: " + article return None except urllib2.URLError: print "Network is unreachable while working on: " + article return None except socket.timeout: print "Query timed out for: " + article return None if len(results["results"]["bindings"]) > 0: coordinates_array = [] for result in results["results"]["bindings"]: if "lat" in result: latitude = result["lat"]["value"] longitude = result["long"]["value"] else: lat_dir = 0 if "latDir" in result: lat_dir = result["latDir"]["value"] latitude = self.dms2dd(result["latDeg"]["value"], result["latMin"]["value"], result["latSec"]["value"], lat_dir) longitude = self.dms2dd(result["lonDeg"]["value"], result["lonMin"]["value"], result["lonSec"]["value"], lat_dir) latitude = float(latitude) longitude = float(longitude) coordinates_array.append((latitude, longitude)) majority_vote = majority_voting.vote(coordinates_array, threshold) location = country_lookup.get_country(majority_vote[0], majority_vote[1]) return location
location_tuple = (float(result["lat"]["value"]), float(result["long"]["value"])) if url_location_dictionary.has_key(stripped_url): url_location_dictionary[stripped_url].append(location_tuple) else: locations = [] locations.append(location_tuple) url_location_dictionary[stripped_url] = locations offset += limit else: break # do a majority voting on the retrieved locations url_majority_location_dictionary = {} for url in url_location_dictionary: # skip web.archive.org links if "web.archive.org" in str(url): continue # skip web.archive.org links if "webcitation.org" in str(url): continue url_majority_location_dictionary[url] = majority_voting.vote( url_location_dictionary[url], absolute_threshold) # write results to a JSON file with open(outputfile_path, 'w') as f: json.dump(url_majority_location_dictionary, f, indent=4, sort_keys=True) print "File was stored successfully"
def query_location(self,article): article_url = '<http://'+ language + '.wikipedia.org/wiki/'+article + '>' location = None # SPARQL query that namespaces = """ PREFIX dbo: <http://dbpedia.org/resource/classes#> PREFIX dbp: <http://dbpedia.org/property/> """ location_query = """ { SELECT * WHERE { ?match dbp:latitude ?lat . ?match dbp:longitude ?long } } UNION { SELECT * WHERE { ?match geo:lat ?lat . ?match geo:long ?long } } UNION { SELECT * WHERE { ?match dbp:latDeg ?latDeg . ?match dbp:latMin ?latMin . ?match dbp:latSec ?latSec . ?match dbp:lonDeg ?lonDeg . ?match dbp:lonMin ?lonMin . ?match dbp:lonSec ?lonSec } } UNION { SELECT * WHERE { ?match dbp:latDeg ?latDeg . ?match dbp:latMin ?latMin . ?match dbp:latSec ?latSec . ?match dbp:lonDeg ?lonDeg . ?match dbp:lonMin ?lonMin . ?match dbp:lonSec ?lonSec . ?match dbp:latDir ?latDir . ?match dbp:lonDir ?lonDir } } UNION { SELECT * WHERE { ?match dbp:latDegrees ?latDeg . ?match dbp:latMinutes ?latMin . ?match dbp:latSeconds ?latSec . ?match dbp:longDegrees ?lonDeg . ?match dbp:longMinutes ?lonMin . ?match dbp:longSeconds ?lonSec . ?match dbp:latDirection ?latDir . ?match dbp:longDirection ?lonDir } } UNION { SELECT * WHERE { ?match dbp:latd ?lat . ?match dbp:longd ?long } } """ query_string = namespaces + """ SELECT * WHERE { ?match foaf:isPrimaryTopicOf """+article_url+""" . """+location_query+""" } """ query_string_with_offset = query_string self.sparql.setQuery(query_string_with_offset) try: results = self.sparql.query().convert() except QueryBadFormed: print "SPARQL query bad formed: " + query_string_with_offset return None except urllib2.HTTPError: print "HTTP Error 502: " + article return None except urllib2.URLError: print "Network is unreachable while working on: " + article return None except socket.timeout: print "Query timed out for: " + article return None if len(results["results"]["bindings"]) > 0: coordinates_array = [] for result in results["results"]["bindings"]: if "lat" in result: latitude = result["lat"]["value"] longitude = result["long"]["value"] else: lat_dir=0 if "latDir" in result: lat_dir = result["latDir"]["value"] latitude = self.dms2dd(result["latDeg"]["value"], result["latMin"]["value"], result["latSec"]["value"], lat_dir) longitude = self.dms2dd(result["lonDeg"]["value"], result["lonMin"]["value"], result["lonSec"]["value"], lat_dir) latitude = float(latitude) longitude = float(longitude) coordinates_array.append((latitude,longitude)) majority_vote = majority_voting.vote(coordinates_array,threshold) location = country_lookup.get_country(majority_vote[0],majority_vote[1]) return location
stripped_url = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_url) location_tuple=(float(result["lat"]["value"]),float(result["long"]["value"])) if url_location_dictionary.has_key(stripped_url) : url_location_dictionary[stripped_url].append(location_tuple) else: locations = [] locations.append(location_tuple) url_location_dictionary[stripped_url] = locations offset += limit else: break # do a majority voting on the retrieved locations url_majority_location_dictionary = {} for url in url_location_dictionary: # skip web.archive.org links if "web.archive.org" in str(url): continue # skip web.archive.org links if "webcitation.org" in str(url): continue url_majority_location_dictionary[url]=majority_voting.vote(url_location_dictionary[url],absolute_threshold) # write results to a JSON file with open(outputfile_path, 'w') as f: json.dump(url_majority_location_dictionary, f, indent=4, sort_keys=True) print "File was stored successfully"