Пример #1
0
    def get_context(self):
        logging.info("Ongoing...")
        for value in self._submissions.values():
            try:
                article = Article(value["submission.url"])
                article.download()
                article.parse()
                article.nlp()
                value["article.authors"] = article.authors
                value["article.text"] = article.text

                geo = Geoparser()
                places = geo.geoparse(article.text)

                countries_in_article = set()

                for country in places:
                    countries_in_article.add(country["country_predicted"])

                for country in countries_in_article:
                    if not str(country) in self._countries:
                        self._countries[str(
                            country)] = value["calculated_score"]
                    else:
                        self._countries[str(
                            country)] += value["calculated_score"]

                value["article.top_image"] = article.top_image
                value["article.summary"] = article.summary
                value["article.keywords"] = article.keywords
                value["article.countries"] = list(countries_in_article)
            except Exception as e:
                logging.info("Error: " + str(e))
Пример #2
0
class VCGeotagger:
    def __init__(self):
        self.geo = Geoparser()
        long_short_state = states.split('\n')

        long_short_state = [ent.split('-') for ent in long_short_state]

        long_short_state[9] = ['Georgia United States', 'GA']

        self.state_dict = {a[1]: a[0] for a in long_short_state}

    def remove_non_ascii(self,text):
        return ''.join(i for i in text if ord(i)<128)

    def geotag(self, text):
        text = self.remove_non_ascii(text)
        result = self.geo.geoparse(text)
        if not result:
            return "None", "None"
        for r in result:
            if r['country_predicted'] == 'USA' and 'geo' in r:
                state = r['geo']['admin1']
                city = r['geo']['place_name'] 
                if state != city:
                    return city, state 
                else:
                    return "None", state

    def placetag(self, text):
        tmp_list = text.split(', ')
        if len(tmp_list) == 2:
            if tmp_list[1] in self.state_dict:
                state = self.state_dict[tmp_list[1]]
            else:
                state = tmp_list[1]
            city = tmp_list[0]
        else:
            state = 'None'
            city = 'None'
        return city, state
        
def search_geolocation(message):
    paste = Paste.Paste(message)
    content = paste.get_p_content()

    # Load Geoparser
    geo = Geoparser()
    geolocation = geo.geoparse(message)

    # regex ton find latitude and longitude
    reg_lat = re.compile(r'(\'lat\': \'([-\d.]+)\',)')
    reg_lon = re.compile(r'(\'lon\': \'([-\d.]+)\',)')

    #lat = set(reg_lat.findall(content))
    #lon = set(reg_lat.findall(content))

    lat = reg_lat.search(message).group(2)
    lon = reg_lon.search(message).group(2)

    print('latitude: {}'.format(lat))
    print('longitude: {}'.format(lon))

    print('{} text geolocation'.format(paste.p_name))
    publisher.warning('{} contains geolocation'.format(paste.p_name))
Пример #4
0
capital = re.compile('.*[A-Z]*.*')
#geo = Geoparser()

for inputfile in os.listdir("../processed_files"):
    name, extension = os.path.splitext(inputfile)
    outfilename = name + "_output.txt"
    inputfile = "../processed_files/" + inputfile
    print("Outfile name: " + outfilename)
    if xmlfile.match(inputfile) and outfilename not in os.listdir(
            "../geoparser_output"):  # Only process XML files
        geo = Geoparser()
        with open(inputfile, "r") as infile:
            print("Processing data from " + inputfile + "...")
            data = infile.readlines()
        infile.close()

        output = geo.geoparse(str(data))
        outfilename = "../geoparser_output/" + outfilename

        with open(outfilename, "a") as outfile:
            for word in output:
                if capital.match(
                        word['word']
                ):  # Filter out place names that don't contain any capital later (Comment out to remove filter)
                    outfile.write(str(word))
                    outfile.write("\n")

        outfile.close()
#		break # Temporary modification: process only one file at a time
#		quit() # End script execution
Пример #5
0
from mordecai import Geoparser
geo = Geoparser()
print(geo.geoparse("Retencions a la B-23, Barcelona."))
from functools import reduce
import json

geo = Geoparser()
ALL_Location = []
run_function = lambda x, y: x if y in x else x + [y]

with open('data/covid_19.csv', errors="ignore") as f:
    Reader = csv.DictReader(f)
    for row in Reader:
        a = row["abstract"]
        t = row["title"]
        j = row["journal"]
        url = row["url"]
        try:
            geoINF = geo.geoparse(a)
        except:
            continue
        for i in range(len(geoINF)):
            location = {
                'word': '',
                'place_name': '',
                'country': '',
                'lat': '',
                'lon': '',
                'title': t,
                'journal': j,
                'url': url
            }
            try:
                a = geoINF[i]['geo']