Пример #1
0
 def __init__(self, name, email, invalid_matches, amazon, country):
     self.name = name
     self.email = email
     self.invalid_matches = invalid_matches
     self.amazon = amazon
     try:
         self.continent = transformations.cn_to_ctn(country)
     except KeyError as e:
         self.continent = transformations.cn_to_ctn(self.__handle_country_name(country))
Пример #2
0
    def construct_santa_recipient(cls, raw_string):
        obj = cls()
        name, email, amazon, country = re.match(r'([^<]*)<([^>]*)>.*(http[^ ]*) ([^<]+)', raw_string).groups()
        obj.name = name.strip()
        obj.email = email
        obj.invalid_matches = []
        obj.amazon = amazon
        try:
            obj.continent = transformations.cn_to_ctn(country)
            obj.country = country
        except KeyError as e:
            obj.continent = transformations.cn_to_ctn(obj.__handle_country_name(country))

        return obj
Пример #3
0
    def construct_santa_recipient(cls, raw_string):
        obj = cls()
        name, email, amazon, country = re.match(
            r'([^<]*)<([^>]*)>.*(http[^ ]*) ([^<]+)', raw_string).groups()
        obj.name = name.strip()
        obj.email = email
        obj.invalid_matches = []
        obj.amazon = amazon
        try:
            obj.continent = transformations.cn_to_ctn(country)
            obj.country = country
        except KeyError as e:
            obj.continent = transformations.cn_to_ctn(
                obj.__handle_country_name(country))

        return obj
Пример #4
0
def isafrica (location):
    location = location.capitalize ()
    try:
        if location.startswith('Congo'):
            return True
        return transformations.cn_to_ctn(location) == 'Africa'
    except:
        return False
Пример #5
0
def countries():
    continents = {"Unspecified"}
    for country in pycountry.countries:
        try:
            continents.add(transformations.cn_to_ctn(country.name))
        except KeyError as e:
            print ('KeyError - reason "%s"' % str(e))
            
    for x in continents:
        if db(db.continent.continent_name == x).isempty():
            db.continent.insert(continent_name=x)
            
    for country in pycountry.countries:
        try:  # seems som
            continent = transformations.cn_to_ctn(country.name)
            if db(db.country.country_name == country).isempty():
                db.country.insert(country_name=country.name, continent=continent)
        except KeyError as e:
            print ('IKeyError - reason "%s"' % str(e))
            
    return locals()
Пример #6
0
def find_continent(country):
    if country == 'Unknown':
        return 'Unknown'
    else:
        return transformations.cn_to_ctn(country)
Пример #7
0
def cn_to_ctn(country):
    try:
        original_name = ' '.join(re.findall('[A-Z][^A-Z]*', country[4:]))
        return transformations.cn_to_ctn(original_name)
    except KeyError:
        return "unk"
__author__ = 'Gaurav-PC'

from incf.countryutils import transformations

result = transformations.cn_to_ctn('Vietnam')
print(result)

Пример #9
0
    def assignCodeSEC(self,secModifier):
        print "assigning code to " + secModifier + "....."
        code_train = pd.read_csv(
            "C:/Users/niharika.kumar/Desktop/scrubbing tool/sec.modifier datasets/secondary.modif_code.csv", header=0, \
            delimiter=",", quoting=2)

        code_train = code_train.loc[code_train['Modifier'] == secModifier]
        code = []
        rms_code = []
        max_country = ''
        max_state = ''
        max_longitude=''
        max_latitude=''
        max_city=''
        if not confidence['country'] == []:
            max_country = max(confidence['country'][0].iteritems(), key=operator.itemgetter(1))[0]
        if not confidence['state'] == []:
            max_state = max(confidence['state'][0].iteritems(), key=operator.itemgetter(1))[0]
        if not confidence['city'] == []:
            max_city = max(confidence['city'][0].iteritems(), key=operator.itemgetter(1))[0]
        if not confidence['county'] == []:
            max_county = max(confidence['county'][0].iteritems(), key=operator.itemgetter(1))[0]
        if not confidence['latitude'] == [] and not confidence['longitude'] == []:
            max_latitude = max(confidence['latitude'][0].iteritems(), key=operator.itemgetter(1))[0]
            max_longitude = max(confidence['longitude'][0].iteritems(), key=operator.itemgetter(1))[0]
        region = ''
        columns = ['Text', 'Description', 'Rms', 'Cosine', 'Code', 'Count']
        df_ = pd.DataFrame(columns=columns)
        df_ = df_.fillna(0)
        i = -1
        for text in Scrub.bifercated_data[secModifier]:
            i = i + 1
            t = ''
            count = 0
            df_ = df_[0:0]
            if text == '':
                code.append('')
                rms_code.append('')
            else:
                if not max_latitude=='' and not max_longitude=='':
                    t = str(Scrub.sov[max_latitude][i])+','+str(Scrub.sov[max_longitude][i])
                elif not max_country == '':
                    t = output_detail['country'][max_country][i]
                    continent = transformations.cn_to_ctn(t)
                elif not max_state == '':
                    t = str(Scrub.sov[max_state][i])
                elif not max_city =='':
                    t = str(Scrub.sov[max_city][i])
                elif not max_county =='':
                    t = str(Scrub.sov[max_county][i])
                try:
                    x = "https://maps.googleapis.com/maps/api/geocode/json?address=" + t.strip() + "&key=AIzaSyBOR3PXZKEKREeOtZptN3K2Wb3D85PTB18"
                    html_page = urllib2.urlopen(x)
                    r = json.load(html_page)
                    if len(r['results']) != 0:
                        formatted_address = r['results'][0]["formatted_address"]
                        if 'USA' in formatted_address or 'Canada' in formatted_address:
                            country = 'USA'
                        elif 'Japan' in formatted_address:
                            country = 'Japan'
                        elif continent == 'Europe':
                            country = 'Europe'
                        else:
                            country = 'Rest'
                    print text
                    if 1:
                        if country == 'USA' or country == 'Canada':
                            region = 'u.s.a ,canada'
                            region_data = code_train.loc[code_train['Region'] == 'u.s.a ,canada']
                        elif country == 'Japan':
                            region_data = code_train.loc[code_train['Region'] == 'Japan']
                            region = 'Japan'
                        elif country == 'Europe':
                            region_data = code_train.loc[code_train['Region'] == 'europe']
                            region = 'europe'
                        else:
                            region_data = code_train.loc[code_train['Region'] == 'Rest']
                            region = 'Rest'
                        for index, row in region_data.iterrows():
                            vector1 = self.text_to_vector(str(row['Description']))
                            vector2 = self.text_to_vector(str(text))
                            cosine = self.get_cosine(vector1, vector2)
                            if cosine >= 0.3:
                                df_.loc[count] = [text, row['Description'], row['Rms Description'], cosine, row['Code'],
                                                  0]
                                count = count + 1
                        print "finding the most probable code...."
                        # group by description,cosine and code

                        df_frequency = pd.DataFrame(
                            {'Count': df_.groupby(['Description', 'Cosine', 'Code', 'Rms']).size()}).reset_index()
                        max_count = df_frequency['Count'].max()
                        print max_count
                        if not np.isnan(max_count):
                            df_frequency = df_frequency.loc[df_frequency['Count'] == max_count]
                            freq = len(df_frequency.index)
                            if freq >= 1:
                                max_cosine = df_frequency['Cosine'].max()
                                df_frequency = df_frequency.loc[df_frequency['Cosine'] == max_cosine]
                                freq_code = df_frequency['Code'].iloc[0]
                                code.append(freq_code)
                                rms_code.append(df_frequency['Rms'].iloc[0])
                            del df_frequency
                        else:
                            code.append(0)
                            rms_code.append('UNKNOWN')
                           

                except Exception,ex:
                    print str(ex)
Пример #10
0
def cn_to_ctn(country):
    try:
        return transformations.cn_to_ctn(id_to_name_map[country])
    except KeyError:
        return "unk"
Пример #11
0
def get_location(query, format, api_key):
    """Get geographic data of a lab in a coherent way for all labs."""

    # Play nice with the API...
    sleep(1)
    geolocator = OpenCage(api_key=api_key, timeout=10)

    # Variables for storing the data
    data = {
        "city": None,
        "address_1": None,
        "postal_code": None,
        "country": None,
        "county": None,
        "state": None,
        "country_code": None,
        "latitude": None,
        "longitude": None,
        "continent": None
    }
    road = ""
    number = ""
    # Default None values
    location_data = {
        "city": None,
        "road": None,
        "house_number": None,
        "postcode": None,
        "country": None,
        "county": None,
        "state": None,
        "ISO_3166-1_alpha-2": None,
        "country_code": None,
        "lat": None,
        "lng": None
    }

    # Reverse geocoding ... from coordinates to address
    if format == "reverse":
        # If the query (coordinates) is not empty
        if query is None or len(query) < 3:
            pass
        else:
            location = geolocator.reverse(query)
            if location is not None:
                location_data = location[0].raw[u'components']
                location_data["lat"] = location[0].raw[u'geometry']["lat"]
                location_data["lng"] = location[0].raw[u'geometry']["lng"]
    # Direct geocoding ... from address to coordinates and full address
    if format == "direct":
        # If the query (address) is not empty
        if query is None or len(query) < 3:
            pass
        else:
            location = geolocator.geocode(query)
            if location is not None:
                location_data = location.raw[u'components']
                location_data["lat"] = location.raw[u'geometry']["lat"]
                location_data["lng"] = location.raw[u'geometry']["lng"]

    # Extract the meaningful data
    for component in location_data:
        if component == "town" or component == "city":
            data["city"] = location_data[component]
        if component == "road":
            road = location_data[component]
        if component == "house_number":
            number = location_data[component]
        if component == "postcode":
            data["postal_code"] = location_data[component]
        if component == "country":
            data["country"] = location_data[component]
        if component == "county":
            data["county"] = location_data[component]
        if component == "state":
            data["state"] = location_data[component]
        if component == "ISO_3166-1_alpha-2":
            data["country_code"] = location_data[component]
    # The address need to be reconstructed
    data["address_1"] = unicode(road) + " " + unicode(number)
    data["latitude"] = location_data["lat"]
    data["longitude"] = location_data["lng"]
    # Format the country code to three letters
    try:
        country_data = transformations.cca2_to_ccn(data["country_code"])
        data["country_code"] = transformations.ccn_to_cca3(country_data)
    except:
        data["country_code"] = None
    # Get the continent
    try:
        country_data = transformations.cc_to_cn(data["country_code"])
        data["continent"] = transformations.cn_to_ctn(country_data)
    except:
        data["continent"] = None

    # Return the final data
    return data