示例#1
0
class AddressParserTest(unittest.TestCase):
    ap = None

    def setUp(self):
        self.ap = AddressParser()

    def test_load_suffixes(self):
        self.assertTrue(self.ap.suffixes["ALLEY"] == "ALY")

    def test_load_cities(self):
        self.assertTrue("wisconsin rapids" in self.ap.cities)

    def test_load_states(self):
        self.assertTrue(self.ap.states["Wisconsin"] == "WI")

    def test_load_zips(self):
        self.ap.load_zips(os.path.join(cwd, "zipcodes.csv"))
        #print self.ap._zip_info
        last = self.ap.zips["99950"]
        self.assertTrue(last["zip"] == "99950")
        self.assertTrue(last["city"] == "Ketchikan")
        self.assertTrue(last["state"] == "AK")
        self.assertTrue(last["lat"] == "55.875767")
        self.assertTrue(last["lng"] == "-131.46633")
        self.assertTrue(last["timezone"] == "-9")
        self.assertTrue(last["dst"] == True)
示例#2
0
def address_parts_probabilities(values, num_rows=100):
    len_values = len(values)
    ap = AddressParser()
    has = {'city': 0, 'state': 0, 'zip': 0}
    probs = {'city': 0, 'state': 0, 'zip': 0}
    max_rows = num_rows if num_rows < len_values - 1 else len_values - 1

    if not column_probability_for_type(values[:max_rows], 'address') > .5:
        return probs

    for v in values[:max_rows]:
        if ',' not in v:
            tokens = v.split(' ')
            for i, token in enumerate(tokens):
                if len(token) > 1 and is_a_city(token):
                    tokens[i] = token + ','
                    break

            v = ' '.join(tokens)

        addr = ap.parse_address(v)
        for k in has.keys():
            if getattr(addr, k, None):
                has[k] += 1

    for k in probs.keys():
        probs[k] = float(has[k]) / max_rows

    return probs
class AddressParserTest(unittest.TestCase):
    ap = None

    def setUp(self):
        self.ap = AddressParser()
    
    def test_load_suffixes(self):
        self.assertEqual(self.ap.suffixes["ALLEY"], "ALY")
    
    def test_load_cities(self):
        self.assertTrue("wisconsin rapids" in self.ap.cities)
    
    def test_load_states(self):
        self.assertEqual(self.ap.states["Wisconsin"], "WI")
    
    def test_load_zips(self):
        self.ap.load_zips(os.path.join(cwd, "zipcodes.csv"))
        last = self.ap.zips["99950"]
        self.assertEqual(last["zip"], "99950")
        self.assertEqual(last["city"], "Ketchikan")
        self.assertEqual(last["state"], "AK")
        self.assertEqual(last["lat"], "55.875767")
        self.assertEqual(last["lng"], "-131.46633")
        self.assertEqual(last["timezone"], "-9")
        self.assertEqual(last["dst"], True)
def addressParse(inputAddress):                                                         ## Probably better to pass this a list instead of running the library for individual addresses.
####################################################################################
## Parses an input string using the python address parsing library from SwoopSearch.
## https://github.com/SwoopSearch/pyaddress
####################################################################################
ap = AddressParser()                                                                    ## Initialize the address parser libary.
address = ap.parse_address(inputAddress) ## Pass an address to the address parser library.
## Since each parsed value needs to be modified in the same format, there may be a way to iterate through
## each returned value. However, the below method in comments does not work and will require further research.
##
## parsefieldList = ['house_number','street_prefix','address.street','street_suffix', 'apartment']
##
## for item in parsefieldList:
## strHouseNumber
## currentItem = parsefieldList.index(item)
## currentItem = str(address.currentItem).strip().upper()
## currentItem = re.sub('[^A-Za-z0-9 ]+', '', currentItem)
if address.house_number is not None: ##Check if house_number returns a value.
    strHouseNumber = str(address.house_number).strip().upper()                          ## Strip whitespace and change to uppercase.
    strHouseNumber = re.sub('[^A-Za-z0-9 ]+', '', strHouseNumber)                       ## Remove all special characters.
    
else: strHouseNumber = ""                                                               ## If house_number is None, then change to a blank string.
if address.street_prefix is not None:
    strStreetPrefix = str(address.street_prefix).strip().upper()
strStreetPrefix = re.sub('[^A-Za-z0-9 ]+', '', strStreetPrefix)
else :
示例#5
0
def address_parts_probabilities(values, num_rows=100):
    len_values = len(values)
    ap = AddressParser()
    has = { 'city': 0, 'state': 0, 'zip': 0 }
    probs = { 'city': 0, 'state': 0, 'zip': 0 }
    max_rows = num_rows if num_rows < len_values - 1 else len_values - 1 
        
    if not column_probability_for_type(values[:max_rows], 'address') > .5:
        return probs

    for v in values[:max_rows]:
        if ',' not in v:
            tokens = v.split(' ')
            for i,token in enumerate(tokens):
                if len(token) > 1 and is_a_city(token):
                    tokens[i] = token + ','
                    break

            v = ' '.join(tokens)

        addr = ap.parse_address(v)
        for k in has.keys():
            if getattr(addr, k, None):
                has[k] += 1

    for k in probs.keys():
        probs[k] = float(has[k]) / max_rows

    return probs
示例#6
0
def is_a_address(value, key=None, pos=None):
    if not is_a_str(value):
        return False

    value = str(value).strip()

    if len(value) > 80:
        return False

    ap = AddressParser()
    address = ap.parse_address(value)

    keys = [
        'house_number', 
        'street', 
        'city',
        'zip',
        'state'
    ]

    has = [key for key in keys if getattr(address, key, None)]

    if len(has) >= 2:
        return True

    return False
示例#7
0
def format_address(address):
    ap = AddressParser()
    address = ap.parse_address(address).full_address()
    result = ''
    for i in address:
        if i != '.':
            result += i
    return result
示例#8
0
def format_address(address):
	ap = AddressParser()
	address = ap.parse_address(address).full_address()
	result = ''
	for i in address:
		if i != '.':
			result += i
	return result
示例#9
0
文件: views.py 项目: imAlan/Scout
def index():
    form = scoutForm()
    if form.validate_on_submit():
        address_input = form.address.data
        ap = AddressParser()
        address = ap.parse_address(address_input)
        lat, lng = get_latlng(address.full_address())
        session['lat'] = lat
        session['lng'] = lng
    return render_template('index.html', form=form)
示例#10
0
文件: views.py 项目: imAlan/Scout
def index():
    form = scoutForm()
    if form.validate_on_submit():
        address_input = form.address.data
        ap = AddressParser()
        address = ap.parse_address(address_input)
        lat, lng = get_latlng(address.full_address())
        session['lat'] = lat
        session['lng'] = lng
    return render_template('index.html', form=form)
示例#11
0
def get_address():
    fix = ''
    while fix.lower() != 'yes':
        address = input('Enter Address: ')
        city = input('Enter City: ')
        state = input('Enter State: ')
        zip_code = input('Enter Zip Code: ')
        ap = AddressParser()
        full_address = ap.parse_address('{}, {}, {}, {}'.format(address,city,state,zip_code))
        print('\n-------- Verify Address -------\n{}'.format(full_address))
        fix = input("Type 'yes' to confirm: ")
    return full_address
示例#12
0
def get_location_db(location, name):
    """
    Search the location in db. If found return. else get its lat and long from google and store in db.
    :param location: actual location
    :name :name of the location
    :return: the geological information of the location
    """
    ap = AddressParser()
    loc_address = ap.parse_address(location)
    street = ""
    if loc_address.house_number is not None:
                    street += loc_address.house_number
    if loc_address.street_prefix is not None:
                    street += loc_address.street_prefix
    if loc_address.street is not None:
                    street += loc_address.street
    if loc_address.street_suffix is not None:
                    street += loc_address.street_suffix
                    
    loc_city = ""
    if loc_address.city is not None:
                    loc_city = loc_address.city   
    loc_state = ""
    if loc_address.state is not None:
                    loc_state = loc_address.state
    loc_zip = ""
    if loc_address.zip is not None:
                    loc_zip = loc_address.zip
                    
    if LocationDetails.query.filter(LocationDetails.address==street,LocationDetails.city==loc_city,LocationDetails.state==loc_state,LocationDetails.zip==loc_zip).count() > 0:
        #print "\n\n\n Address found", location
        record = LocationDetails.query.filter(LocationDetails.address==street,LocationDetails.city==loc_city,LocationDetails.state==loc_state,LocationDetails.zip==loc_zip).first_or_404()
        return record
    
    #print "\n\n\n Address NOT found", location
        
    # get the geolocation of the address
    geo_location = get_lat_lng("http://maps.google.com/maps/api/geocode/json?address="+location+"&sensor=false")
    lat = geo_location["lat"]
    lng = geo_location["lng"]
    createdOn = datetime.now()
    updatedOn = datetime.now()
    
    #insert the address details into the database
    record = LocationDetails(name,street,loc_city,loc_state,loc_zip,createdOn, updatedOn, lat, lng)
    db.session.add(record)
    db.session.commit()
    
    record = LocationDetails.query.filter(LocationDetails.address==street,LocationDetails.city==loc_city,LocationDetails.state==loc_state,LocationDetails.zip==loc_zip).first_or_404()
    return record
示例#13
0
def _geocode_compute_key(address):
    """Generate a key out of <address>."""
    if address == '' or re.match('^[0-9]{5}(-[0-9]{4})?$', str(address)):
        return address
    try:
        ap = AddressParser()
        addr = ap.parse_address(address)
        street = '%s %s %s %s' % (addr.house_number, addr.street_prefix, addr.street, addr.street_suffix)
        citystate = '%s,%s' % (addr.city, addr.state)
        # address lib doesn't handle xxxxx-xxxx zip codes
        search = re.search('([0-9]{5})-[0-9]{4}$', address)
        if search:
            addr.zip = search.groups()[0]
        return '%s|%s|%s' % (street.upper(), citystate.upper(), addr.zip)
    except:
        return ''
示例#14
0
def _geocode_compute_key(address):
    """Generate a key out of <address>."""
    if address == '' or re.match('^[0-9]{5}(-[0-9]{4})?$', str(address)):
        return address
    try:
        ap = AddressParser()
        addr = ap.parse_address(address)
        street = '%s %s %s %s' % (addr.house_number, addr.street_prefix, addr.street, addr.street_suffix)
        citystate = '%s,%s' % (addr.city, addr.state)
        # address lib doesn't handle xxxxx-xxxx zip codes
        search = re.search('([0-9]{5})-[0-9]{4}$', address)
        if search:
            addr.zip = search.groups()[0]
        return '%s|%s|%s' % (street.upper(), citystate.upper(), addr.zip)
    except:
        return ''
示例#15
0
def _parse_address_string(address_str):
    """
    Convenience wrapper around AddressParser. Primarily handles lack of 9-digit
    zipcode support and standardizes address_1 creation.
    """
    ap = AddressParser()
    parsed_address = ap.parse_address(sub('-[0-9]{4}$', '', address_str))
    found_fields = []
    if parsed_address.house_number:
        found_fields.append(parsed_address.house_number)
    if parsed_address.street_prefix:
        found_fields.append(parsed_address.street_prefix)
    if parsed_address.street:
        found_fields.append(parsed_address.street)
    if parsed_address.street_suffix:
        found_fields.append(parsed_address.street_suffix)
    parsed_address.address_1 = ' '.join(found_fields)
    return parsed_address
def _parse_address_string(address_str):
    """
    Convenience wrapper around AddressParser. Primarily handles lack of 9-digit
    zipcode support and standardizes address_1 creation.
    """
    ap = AddressParser()
    parsed_address = ap.parse_address(sub('-[0-9]{4}$', '', address_str))
    found_fields = []
    if parsed_address.house_number:
        found_fields.append(parsed_address.house_number)
    if parsed_address.street_prefix:
        found_fields.append(parsed_address.street_prefix)
    if parsed_address.street:
        found_fields.append(parsed_address.street)
    if parsed_address.street_suffix:
        found_fields.append(parsed_address.street_suffix)
    parsed_address.address_1 = ' '.join(found_fields)
    return parsed_address
示例#17
0
    def increment_dangerous_streets(self, input_address):
        ap = AddressParser()
        address = ap.parse_address(input_address)
        street = ""
        if address.street_prefix is not None:
            pre = str(address.street_prefix)
            pre = pre[:-1]
            street = pre + " "
        if address.street is not None:
            street = street + str(address.street) + " "
        if address.street_suffix is not None:
            su = str(address.street_suffix)
            su = su[:-1]
            street = street + su

        #another way to process the input data--> just getting rid of the numbers at the beginning
        #street = input_address.lstrip('0123456789.- ')
        if street in self.dangerous_streets:
            self.dangerous_streets[street] += 1
        else:
            self.dangerous_streets[street] = 1
示例#18
0
def run_cmd(inputfile, method, column=None, header=False):
    """-c <int>, --column=<int>
    -m <str>, --method=<str>
    """
    
    f = open(inputfile, 'r')
    edit_ix = int(column)


    if header:
        header_line = f.readline()
    firstline = True

    ap = AddressParser()

    for l in f:

        cols = l.split('\t')
        if firstline and header:
            print header_line.strip()
            firstline = False

        try:
            entity = cols[edit_ix].strip()
            if method == 'standardize_address':
                entity = standardizeAddress(entity, ap)
            elif method == 'titlecase':
                entity = fixTitlecase(entity)
            elif method == 'validate_address':

                entity = removeExtraAddressElements(entity)

                ret = validateAddress(entity)

                if ret is None:
                    entity = entity + '\tINVALID'
                else:
                    entity = ret + '\tVALID'

            else:
                print('Error: method not found')
                exit()

            cols[edit_ix] = removeExtraSpaces(entity)

            ledited = '\t'.join(cols)
            print ledited.strip()
        except Exception, e:
            if DEBUG:
                raise e
            print '\t'.join(cols).strip()
示例#19
0
def format_address(address_unformatted):
    try:
        addr_no_period_upper = address_unformatted.replace('.', '').replace(
            ',', '').upper()
        #The parser gets confused if there isn't a city, state, and zip, so append a dummy one if the user doesn't enter it
        if 'BOULDER' not in addr_no_period_upper:
            addr_no_period_upper = addr_no_period_upper + ', BOULDER, CO 80301'
        #It can't seem to parse addresses that have an apartment-like word
        addr_no_apt = addr_no_period_upper.replace('APT', '')
        addr_no_apt = addr_no_apt.replace('UNIT', '')
        addr_no_apt = addr_no_apt.replace('APARTMENT', '')
        addr_no_apt = addr_no_apt.replace('SUITE', '')
        addr_no_apt = addr_no_apt.replace('STE', '')
        addr_no_apt = addr_no_apt.replace('NUMBER', '')
        addr_no_apt = addr_no_apt.replace('NUM', '')
        ap = AddressParser()
        address_parsed = ap.parse_address(addr_no_apt)
        if address_parsed.street_prefix == None:
            address_parsed.street_prefix = ''
        if address_parsed.apartment == None:
            address_parsed.apartment = ''
        if address_parsed.street_suffix == 'Ave.':
            address_parsed.street_suffix = 'AV'
        address = address_parsed.house_number + ' ' + address_parsed.street_prefix + ' ' + address_parsed.street + ' ' + address_parsed.street_suffix + ' ' + address_parsed.apartment
        address = address.replace('.', '').replace('  ', ' ').upper()
        if address[-1] == ' ':
            address = address[:-1]
        #I just need the key values here
        count_dict = get_count_data()
        keys = count_dict.keys()
        if address not in keys:
            return "error"
        else:
            return address
    except:
        return "error"
示例#20
0
    def checkcrime(lat, lon, radius):
        payload = {
            'lat': '37.334164',
            'lon': '-121.884301',
            'radius': '0.05',
            'key': '.'
        }
        payload['lat'] = lat
        payload['lon'] = lon
        payload['radius'] = radius

        try:
            r = requests.get('https://api.spotcrime.com/crimes.json',
                             params=payload)
            print r.url
            responseText = r.text
            crimes = re.findall(
                r'"cdid":\w*,"\w*":"\w*","\w*":"\d*/\d*/\d* \w*:\w*\s*\w*","\w*":"[\w*\s*]*"',
                responseText)
            totalCrimes = 0
            crimeType = {}
            eventTime = {
                '12:01am-3am': 0,
                '3:01am-6am': 0,
                '6:01am-9am': 0,
                '9:01am-12noon': 0,
                '12:01pm-3pm': 0,
                '3:01pm-6pm': 0,
                '6:01pm-9pm': 0,
                '9:01pm-12midnight': 0
            }
            tmMidNght = datetime.strptime(' 12:00 AM', ' %I:%M %p')
            tm3am = datetime.strptime(' 3:00 AM', ' %I:%M %p')
            tm6am = datetime.strptime(' 6:00 AM', ' %I:%M %p')
            tm9am = datetime.strptime(' 9:00 AM', ' %I:%M %p')
            tm12pm = datetime.strptime(' 12:00 PM', ' %I:%M %p')
            tm3pm = datetime.strptime(' 3:00 PM', ' %I:%M %p')
            tm6pm = datetime.strptime(' 6:00 PM', ' %I:%M %p')
            tm9pm = datetime.strptime(' 9:00 PM', ' %I:%M %p')
            tm12pm = datetime.strptime(' 12:00 PM', ' %I:%M %p')
            streets = {}
            for crime in crimes:
                totalCrimes += 1
                typeExacct = re.findall(r':"\w*"', crime)
                tm = re.findall(r"['\"](.*?)['\"]", typeExacct[0])
                if crimeType.has_key(tm[0]):
                    crimeType[tm[0]] += 1
                else:
                    crimeType[tm[0]] = 1

                addr = re.findall(r'"address":"[\s*\w*]*"', crime)
                address = re.findall(r':"[\s*\w*]*"', addr[0])
                street = re.findall(r"['\"](.*?)['\"]", address[0])
                ap = AddressParser()
                streetName = ap.parse_address(street[0])
                streetKey = ""
                if streetName.street_prefix is not None:
                    streetKey += streetName.street_prefix
                if streetName.street is not None:
                    if streetKey is not None:
                        streetKey += " "
                    streetKey += streetName.street
                if streetName.street is not None:
                    if streetKey is not None:
                        streetKey += " "
                    streetKey += streetName.street_suffix

                if streets.has_key(streetKey):
                    streets[streetKey] += 1
                else:
                    streets[streetKey] = 1
                sorted_streets = sorted(streets.items(),
                                        key=operator.itemgetter(1),
                                        reverse=True)

                timeDate = re.findall(r'"date":"\d*/\d*/\d* \w*:\w*\s*\w*"',
                                      crime)
                timeExact = re.findall(r'\s\w*', timeDate[0])
                time2 = re.findall(r'\s\w*:\w*', timeDate[0])
                crmTime = re.findall(r'\s\w*:\w*\s\w*', timeDate[0])
                date_object = datetime.strptime(crmTime[0], ' %I:%M %p')
                if date_object.time() > tmMidNght.time():
                    if date_object.time() <= tm3am.time():
                        eventTime['12:01am-3am'] += 1
                    elif date_object.time() <= tm6am.time():
                        eventTime['3:01am-6am'] += 1
                    elif date_object.time() <= tm9am.time():
                        eventTime['6:01am-9am'] += 1
                    elif date_object.time() <= tm12pm.time():
                        eventTime['9:01am-12noon'] += 1
                    elif date_object.time() <= tm3pm.time():
                        eventTime['12:01pm-3pm'] += 1
                    elif date_object.time() <= tm6pm.time():
                        eventTime['3:01pm-6pm'] += 1
                    elif date_object.time() <= tm9pm.time():
                        eventTime['6:01pm-9pm'] += 1
                    else:
                        eventTime['9:01pm-12midnight'] += 1
                else:
                    eventTime['9:01pm-12midnight'] += 1

            dangerours_streets = [
                sorted_streets[0][0], sorted_streets[1][0],
                sorted_streets[2][0]
            ]
            data = {}
            data['total_crime'] = totalCrimes
            data['the_most_dangerous_streets'] = dangerours_streets
            data['crime_type_count'] = crimeType
            data['event_time_count'] = eventTime
            yield data

        except URLError, e:
            data = {}
            data['status'] = "No data available. Got an error."
            data['error code'] = e
            yield data
示例#21
0
    def checkcrime(ctx, lat, lon, radius, key):

        baseurl = "http://api.spotcrime.com/crimes.jason?"
        response = urllib2.urlopen("%slat=%s&lon=%s&radius=%s&key=." %
                                   (baseurl, lat, lon, radius))
        jsondata = json.load(response)
        #yield response

        totalcrimes = 0
        crime_type_count = {}
        addresslist = []
        timelist = []
        mostdangerous = []
        # get all the components of the dictionary----------------------------

        for i in jsondata["crimes"]:
            if not i["type"] in crime_type_count:
                crime_type_count[str(i["type"])] = 1
            else:
                crime_type_count[str(i["type"])] += 1
            totalcrimes += 1
        for i in range(totalcrimes):
            newdict = jsondata['crimes'][i]
            addressdict = newdict['address']

            timedict = newdict['date']

            timelist.append(timedict)

            addresslist.append(addressdict)
        #print addresslist

# split the timelist#  ------------------------------------------------------------
        timelisttimes = []
        timelist2 = []  #it is a list of strings of times
        for i in timelist:
            x = i.split(" ")
            y = x[1] + x[2]
            timelist2.append(y)
        #print timelist2

#get time in ranges

        am12to3 = 0
        am3to6 = 0
        am6to9 = 0
        am9to12 = 0
        pm12to3 = 0
        pm3to6 = 0
        pm6to9 = 0
        pm9to12 = 0

        for i in timelist2:
            date_object = time.strptime(i, '%I:%M%p').tm_hour
            date_object2 = time.strptime(i, '%I:%M%p').tm_min
            minutes = (date_object * 60) + date_object2
            #print minutes

            if (minutes >= 1 and minutes <= 180):
                am12to3 += 1
            elif (minutes >= 181 and minutes <= 360):
                am3to6 += 1
            elif (minutes >= 361 and minutes <= 540):
                am6to9 += 1
            elif (minutes >= 541 and minutes <= 720):
                am9to12 += 1
            elif (minutes >= 721 and minutes <= 900):
                pm12to3 += 1
            elif (minutes >= 901 and minutes <= 1080):
                pm3to6 += 1
            elif (minutes >= 1081 and minutes <= 1260):
                pm6to9 += 1
            elif (minutes >= 1261 and minutes <= 1440 or minutes == 0):
                pm9to12 += 1

        event_time_count = {
            "12:01am-3am": am12to3,
            "3:01am-6am": am3to6,
            "6:01am-9am": am6to9,
            "9:01am-12noon": am9to12,
            "12:01pm-3pm": pm12to3,
            "3:01pm-6pm": pm3to6,
            "6:01pm-9pm": pm6to9,
            "9:01pm-12midnight": pm9to12
        }

        #list [] will save all the intersections oof streets where crime has happened------------------
        list = []
        for i in addresslist:
            intersections = re.findall("[\w.\s]{1,20}&[\w.\s]{1,20}", i)
            list.append(intersections)
        #print list

#all addresses have alla the addresses in individual--------------------
        alladdresses = []
        for i in list:
            before = re.compile(r"&(.*)")
            matches = re.findall(before, ''.join(i))
            alladdresses.append(matches)
            after = re.compile(r"(.*)&")
            matches2 = re.findall(after, ''.join(i))
            alladdresses.append(matches2)

        list2 = [x for x in alladdresses if x != []]  # remove []
        #print list2, len(list2)

        # list 2 has all the individual addresses so chanage elements into string--------------
        list3 = []
        for i in list2:
            addinstr = ''.join(i)
            list3.append(addinstr)
        #print list3,len(list3)
#lis 3  has the streets of intersection in string in list---------------

#merge both the lists and
        mergelists = list3 + addresslist
        #print mergelists,len(mergelists)
        for i in mergelists:
            if re.findall("[\w.\s*]{1,20}&[\w.\s*]{1,20}", i):
                mergelists.remove(i)
        for i in mergelists:
            if re.findall("[\w.\s*]{1,20}&[\w.\s*]{1,20}", i):
                mergelists.remove(i)
        #print mergelists,len(mergelists)

# covert address format into street format-------------------------------------------------
        allstreets = []  # it will give all addresses in street format streets
        ap = AddressParser()

        for i in mergelists:
            address = ap.parse_address(i)
            x = "{} {} {}".format(address.street_prefix, address.street,
                                  address.street_suffix)
            allstreets.append(x)
        #print x

# convert into dict with corresponding value as total occurence for address-----------------------
        countsaddress = dict()
        for i in allstreets:
            countsaddress[i] = countsaddress.get(i, 0) + 1
        #print countsaddress

# convert into dict with corresponding value as total occurence for time-----------------------
        countstime = dict()
        for i in timelist2:
            countstime[i] = countstime.get(i, 0) + 1
        #print  countstime

#find the most dangrous steeet by sortinng and getting  top 3-----------------------------------

        top3 = Counter(countsaddress)
        top3.most_common()
        #print top3.most_common()

        #now gwt the top 3------------------------------------------------------------------------------
        for key, value in top3.most_common(3):
            mostdangerous.append(key)


#finall print=======================================================
        final_dict = {
            "total_crime": totalcrimes,
            "the_most_dangerous_streets": mostdangerous,
            "crime_type_count": crime_type_count,
            "event_time_count": event_time_count
        }
        yield final_dict
def pars_zip(full_address_string):
    from address import AddressParser, Address
    ap = AddressParser()
    address = ap.parse_address(full_address_string)
    return address.zip
示例#23
0
 def setUp(self):
     self.parser = AddressParser()
def pars_house_number(full_address_string):
    from address import AddressParser, Address
    ap = AddressParser()
    address = ap.parse_address(full_address_string)
    return address.house_number
示例#25
0
     row['zip'] = ''
 if 'salutation' not in row:
     row['salutation'] = ''
 if 'firstName' not in row:
     row['firstName'] = ''
 if 'middleName' not in row:
     row['middleName'] = ''
 if 'lastName' not in row:
     row['lastName'] = ''
 if 'nameSuffix' not in row:
     row['nameSuffix'] = ''
 if 'nickName' not in row:
     row['nickName'] = ''
 #parse the full text name and full text address into their components and add them to the row.  For each row, we are checking the destination value to ensure it is empty, as users my import component values instead of full text values.
 parsedName = HumanName(row['fullName'])
 ap = AddressParser()
 parsedAddress = ap.parse_address(row['fullTextAddress'])
 if not row['salutation']:
     row['salutation'] = parsedName.title
 if not row['firstName']:
     row['firstName'] = parsedName.first
 if not row['middleName']:
     row['middleName'] = parsedName.middle
 if not row['lastName']:
     row['lastName'] = parsedName.last
 if not row['nameSuffix']:
     row['nameSuffix'] = parsedName.suffix
 if not row['nickName']:
     row['nickName'] = parsedName.nickname
 if not row['streetPrefix']:
     row['streetPrefix'] = parsedAddress.street_prefix
示例#26
0
 def setUp(self):
     self.ap = AddressParser()
示例#27
0
 def setUp(self):
     self.ap = AddressParser()
示例#28
0
def same_path(similar_1, similar_2, name):
    print 'path similarity for  ', name
    route_a_2_route_b = []
    for i, j in map(None, similar_1, similar_2):
        a_dict, b_dict = defaultdict(int), defaultdict(int)
        ab = []
        ap = AddressParser()

        if i and j:

            if i[0][1] is not None:
                for x in i:
                    if "I 10" in x[1] or "Interstate " in x[1] or "I-" in x[
                            1] or "US-" in x[1] or "TX-" in x[
                                1] or "State Highway" in x[
                                    1] or ' ' in x[1] and x[2] > .7:
                        ad1 = 'I-10'
                        if ad1 in a_dict:
                            a_dict[ad1] += x[2]
                        else:
                            a_dict[ad1] = x[2]
                    else:
                        address = ap.parse_address(x[1])
                        ad1 = address.street
                        if ad1 in a_dict:
                            a_dict[ad1] += x[2]
                        else:
                            a_dict[ad1] = x[2]
            else:
                a_dict['None'] = 0

            if j and j[0][1] is not None:
                for y in j:
                    if "I 10" in y[1] or "Interstate " in y[1] or "I-" in y[
                            1] or "US-" in y[1] or "TX-" in y[
                                1] or "State Highway" in y[
                                    1] or ' ' in y[1] and y[2] > .7:
                        ad2 = 'I-10'
                        if ad2 in b_dict:
                            b_dict[ad2] += y[2]
                        else:
                            b_dict[ad2] = y[2]
                    else:
                        address = ap.parse_address(y[1])
                        ad2 = address.street
                        if ad2 in b_dict:
                            b_dict[ad2] += y[2]
                        else:
                            b_dict[ad2] = y[2]
            else:
                b_dict['None'] = 0

            route_id = i[0][0]

            for key, a_d in a_dict.items():
                if key in b_dict:
                    b_d = b_dict[key]
                    same = min(a_d, b_d)
                else:
                    same = 0
                ab.append(same)
            sim = [route_id, round(sum(ab), 2)]
            route_a_2_route_b.append(sim)

        else:
            route_id = int(route_id) + 1
            sim = [str(route_id), 'na']
            route_a_2_route_b.append(sim)

    return route_a_2_route_b
    '''
示例#29
0
def index(request):

    ap = AddressParser()
    if request.method == 'POST':
        form = rideForm(request.POST)
        if form.is_valid():
            departDate = request.POST.get('departDate')
            departTime = request.POST.get('departTime')
            departLocation = request.POST.get('departLocation')
            departLocation = ap.parse_address(departLocation)
            if not departLocation.zip:
                departZip = zcdb.find_zip(city=departLocation.city,
                                          state=departLocation.state)[0].zip
            else:
                departZip = departLocation.zip
            arrivalLocation = request.POST.get('arrivalLocation')
            arrivalLocation = ap.parse_address(arrivalLocation)
            if not arrivalLocation.zip:
                arrivalZip = zcdb.find_zip(city=arrivalLocation.city,
                                           state=arrivalLocation.state)[0].zip
            else:
                arrivalZip = arrivalLocation.zip
            driverEmail = request.user.email
            rideLugg = request.POST.get('ridersLugg')
            seatCapac = request.POST.get('seatCapacity')
            riderPrice = request.POST.get('riderPrice')
            if request.POST.get('drisSmokes') == 'on':
                driverSmokes = 1
            else:
                driverSmokes = 0
            if request.POST.get('ridersPets') == 'on':
                ridePets = 1
            else:
                ridePets = 0
            record = Rides(depDate=departDate,
                           depTime=departTime,
                           depZip=departZip,
                           arrZip=arrivalZip,
                           driEmail=driverEmail,
                           seatCapacity=seatCapac,
                           reserved='Open',
                           driSmokes=driverSmokes,
                           riderPets=ridePets,
                           riderLugg=rideLugg,
                           riderPrice=riderPrice)
            record.save()
            messages.success(request,
                             'The Ride Offer was submitted successfully.')

            returnDate = request.POST.get('returnDate')
            returnTime = request.POST.get('returnTime')

            if returnDate != '':
                retRecord = Rides(depDate=returnDate,
                                  depTime=returnTime,
                                  depZip=arrivalZip,
                                  arrZip=departZip,
                                  driEmail=driverEmail,
                                  seatCapacity=seatCapac,
                                  reserved='Open',
                                  driSmokes=driverSmokes,
                                  riderPets=ridePets,
                                  riderLugg=rideLugg,
                                  riderPrice=riderPrice)
                retRecord.save()
            else:
                pass

        else:
            pass
            print(form.errors)

    return render(request, 'rideSubmit.html')
示例#30
0
        primary = str(value).split('-')[0]
    else:
        primary = value

    try:
        primary = int(primary)
    except:
        return False

    if len(str(primary)) == 5 and int(primary) > 499:
        return True

    return False


ap = AddressParser()


def address_pieces(value):
    if not is_a_str(value):
        return [], None

    value = str(value).strip()

    if len(value) > 80:
        return [], None

    address = ap.parse_address(value)

    keys = ['house_number', 'street', 'city', 'zip', 'state']
示例#31
0
C:\Users\Aman.Sivaprasad\Music>docker run -it --rm -p 8888:8888 -v C:\Users\Aman.Sivaprasad\Music:/home/jovyan/work riordan/docker-jupyter-scipy-notebook-libpostal


from postal.parser import parse_address
parse_address()



#address
#addressnet # austrialia
#usaddress #usa
#pypostal #alll
#postal-address #European

#googleapi #https://github.com/thilohuellmann/address_transformation/blob/master/address_transformation.py

#https://www.kaggle.com/stefanjaro/libpostal-windows-and-jupyter-notebook

from postal.parser import parse_address
parse_address('The Book Club 100-106 Leonard St Shoreditch London EC2A 4RH, United Kingdom')



from address import AddressParser, Address

ap = AddressParser()
address = ap.parse_address(text3)
print(address)
print("Address is: {0} {1} {2} {3}".format(address.house_number, address.street_prefix, address.street, address.street_suffix))

#Address is: 123 W. Mifflin St.
import os
from address import Address, AddressParser


if __name__ == '__main__':
    # The mini test program takes a list of addresses, creates Address objects, and prints errors for each one
    # with unmatched terms. Takes a filename as the first and only argument. The file should be one address per line.
    if len(sys.argv) != 2:
        print "Usage: test_list.py filename"
        sys.exit(1)
    if not os.path.exists(sys.argv[1]):
        print "File {0} does not exist".format(sys.argv[1])
        sys.exit(2)
    unmatched_count = 0
    line_count = 0
    ap = AddressParser()
    with open(sys.argv[1]) as input:
        for line in input:
            addr = ap.parse_address(line.strip(), line_number=line_count)

            if addr.unmatched:
                print "Unmatched", addr, addr.line_number
                print ""
                unmatched_count = unmatched_count + 1
            # All addresses have a house number and a street.
            if addr.house_number is None:
                print "House number cannot be None: ", addr, addr.line_number
            if addr.street is None:
                print "Street cannot be None: ", addr, addr.line_number
            line_count = line_count + 1
            print addr.full_address()
示例#33
0
import csv, json
from address import AddressParser, Address
import usaddress

string = "361 Farmington Avenue PO Box 17183"

contacts = []

ap = AddressParser()
parsedAddress = ap.parse_address(string)
print parsedAddress
print "Street Prefix: ", parsedAddress.street_prefix
print "House Number: ", parsedAddress.house_number
print "Street: ", parsedAddress.street
print "Street Suffix: ", parsedAddress.street_suffix
print "Apartment: ", parsedAddress.apartment
print "Building: ", parsedAddress.building
print "City: ", parsedAddress.city
print "State: ", parsedAddress.state
print "Zip: ", parsedAddress.zip

usa = usaddress.tag(string)
print type(usa)
print ap
print usa
示例#34
0
from etes import app
from flask import render_template, redirect, url_for, session, request
from user.form import RegisterForm, LoginForm
from event.form import SellForm
from etes import db
from user.models import User
import bcrypt
from address import AddressParser, Address

ap = AddressParser()
address = ap.parse_address('123 West Mifflin Street, Madison, WI, 53703')
print "Address is: {0} {1} {2} {3}".format(address.house_number, address.street_prefix, address.street, address.street_suffix)

"""
    For Register: Calling the HTTP Request (POST) to receive the forms submitted by the user and registering them to
    the database
    
    For Login: Calling the HTTP Request (GET) to receive the forms submitted by the user and see if they are a registered
    regform = RegisterForm()
"""
@app.route('/login', methods=('GET', 'POST'))
def login():
    form = LoginForm()
    regform = RegisterForm()
    error = None
    
    if form.validate_on_submit() or regform.validate_on_submit():
        if request.form['btn'] == 'Login':
            user = User.query.filter_by(username=form.username.data).first()
            if bcrypt.hashpw(form.password.data.encode('utf8'), user.password.encode('utf8')) == user.password.encode('utf8'): #decrypt password
                session['username'] = form.username.data
示例#35
0
	full_address = Column(String)
	zipcode = Column(Integer)
	city = Column(String)
	state = Column(String)
	latitude = Column(Float)
	longitude = Column(Float)
	stars = Column(Float)
	review_count = Column(Integer)
	photo_url = Column(String)
	categories = Column(String)

Base.metadata.create_all(engine) 
Session = sessionmaker(bind=engine)
session = Session()
json_file = 'business'+'.json'
ap = AddressParser()
not_found = []
with open(json_file) as f:
	for line in f:
		json_object = json.loads(line)
		if not proceed(json_object):
			continue
		json_object['categories'] = 'food'
		try:
			address = ap.parse_address(json_object['full_address'].replace('\n',','))
		except:
			continue	
		json_object['zipcode'] = address.zip
		new_object = Business()
		for key in json_object.keys():
			skey = key.lower()
示例#36
0
    def checkcrime(lat, lon, radius):
        output = {"total_crime": 0,"the_most_dangerous_streets":[],
                   "crime_type_count": {},

                   'event_time_count': {
                       "12:01am-3am": 0, "3:01am-6am": 0, "6:01am-9am": 0, "9:01am-12noon": 0, "12:01pm-3pm": 0,
                       "3:01pm-6pm": 0,
                       "6:01pm-9pm": 0, "9:01pm-12midnight": 0}
                   }


        URL = "https://api.spotcrime.com/crimes.json"
        data = {'lat': lat, 'lon': lon, 'radius': radius, 'key': '.'}
        a = requests.get(URL, params=data)
        # yield a.json()
        data1 = a.json()


        for n in data1["crimes"]:
            output["total_crime"]+= 1
        #yield output

        for n in data1["crimes"]:
         if n["type"] in output['crime_type_count']:
            output['crime_type_count'][n['type']] += 1
         else:
            output['crime_type_count'][n['type']] = 1
        #yield output

        for n in data1["crimes"]:
            q1 = n["date"]
            p = q1.split(' ')
            time1 = p[1]
            ampm = p[2]
            part = time1.split(':')
            hour = part[0]
            min = part[1]
            # print min
            # print hour
            # print ampm

            if ((hour == "12" and min != "00" and ampm == "AM") or ((hour == "01" or hour == "02") and ampm == "AM") or (
                                hour == "03" and min == "00" and ampm == "AM")):
                output['event_time_count']["12:01am-3am"] += 1

            elif ((hour == "03" and min != "00" and ampm == "AM") or ((hour == "04" or hour == "05") and ampm == "AM") or (
                                hour == "06" and min == "00" and ampm == "AM")):
                output['event_time_count']["3:01am-6am"] += 1

            elif ((hour == "06" and min != "00" and ampm == "AM") or ((hour == "07" or hour == "08") and ampm == "AM") or (
                                hour == "09" and min == "00" and ampm == "AM")):
                output['event_time_count']["6:01am-9am"] += 1

            elif ((hour == "09" and min != "00" and ampm == "AM") or ((hour == "11" or hour == "10") and ampm == "AM") or (
                                hour == "12" and min == "00" and ampm == "PM")):
                output['event_time_count']["9:01am-12noon"] += 1


            elif ((hour == "12" and min != "00" and ampm == "PM") or ((hour == "02" or hour == "01") and ampm == "PM") or (
                                hour == "03" and min == "00" and ampm == "PM")):
                output['event_time_count']["12:01pm-3pm"] += 1

            elif ((hour == "03" and min != "00" and ampm == "PM") or ((hour == "05" or hour == "04") and ampm == "PM") or (
                                hour == "06" and min == "00" and ampm == "PM")):
                output['event_time_count']["3:01pm-6pm"] += 1

            elif ((hour == "06" and min != "00" and ampm == "PM") or ((hour == "07" or hour == "08") and ampm == "PM") or (
                                hour == "09" and min == "00" and ampm == "PM")):
                output['event_time_count']["6:01pm-9pm"] += 1
            else:
                output['event_time_count']["9:01pm-12midnight"] += 1

        #yield output

        addresslist = []
        for i in range(50):
            totaldict = data1["crimes"][i]
            addressdict = totaldict['address']
            addresslist.append(addressdict)
        #print addresslist

        # print all addesses
        newstreetlist = []
        for i in addresslist:
            ap = AddressParser()
            address = ap.parse_address(i)
            a = "{} {}".format(address.street, address.street_suffix)
            newstreetlist.append(a)
        #print newstreetlist


        # find all with & in between
        substreet = []
        for s in newstreetlist:
            joint = re.findall("[\w.\s] {1,20}&[\w.\s] {1,20}", s)
            substreet.append(joint)
        print substreet

        totaladdresslist = []
        for i in substreet:
            previous = re.compile(r"&(.*)")
            matches = re.findall(previous, ''.join(i))
            totaladdresslist.append(matches)
            later = re.compile(r"(.*)&")
            matches2 = re.findall(later, ''.join(i))
            totaladdresslist.append(matches2)
        #print totaladdresslist



        list2 = [x for x in newstreetlist if x != []]  # remove []

        # list 2 has all the individual addresses so chanage elements into string
        list3 = []
        for i in list2:
            addinstr = ''.join(i)
        list3.append(addinstr)
        # print list3 #it has the streets of intersection in string in list
        mergelists = list3 + newstreetlist
        # print mergelists,len(mergelists)


        for i in mergelists:
            if regex.findall("[\w.\s*]{1,20}&[\w.\s*]{1,20}", i):
               mergelists.remove(i)
        for i in mergelists:
            if regex.findall("[\w.\s*]{1,20}&[\w.\s*]{1,20}", i):
               mergelists.remove(i)
        print mergelists,len(mergelists)


        #convert into dict with corresponding value as total numbers

        addresscounter = {}

        for n in data1["crimes"]:
            if not n["address"] in addresscounter:
             addresscounter[str(n["address"])] = 1
        else:
             addresscounter[str(n["address"])] += 1
        #print addresscounter

        streetcounter = {}
        for o in mergelists:
            streetcounter[o] = streetcounter.get(o, 0) + 1
        #print streetcounter

        cou = dict()
        timelist = []
        for j in timelist:
            cou[j] = cou.get(j,0) + 1

        mostthree = []
        dangerous = Counter(streetcounter)
        dangerous.most_common()
        for k, v in dangerous.most_common(3):
         output["the_most_dangerous_streets"].append(k)
        yield output
示例#37
0
 def post(self, request):
     form = self.form_class(request.POST, request.FILES)
     if form.is_valid():
         file = request.FILES.get('file')
         rows = csv.DictReader(file)
         for row in rows:
             if 'title' not in row:
                 row['title'] = ''
             if 'company' not in row:
                 row['company'] = ''
             if 'streetPrefix' not in row:
                 row['streetPrefix'] = ''
             if 'houseNumber' not in row:
                 row['houseNumber'] = ''
             if 'streetName' not in row:
                 row['streetName'] = ''
             if 'streetSuffix' not in row:
                 row['streetSuffix'] = ''
             if 'apartment' not in row:
                 row['apartment'] = ''
             if 'building' not in row:
                 row['building'] = ''
             if 'city' not in row:
                 row['city'] = ''
             if 'state' not in row:
                 row['state'] = ''
             if 'zip' not in row:
                 row['zip'] = ''
             if 'salutation' not in row:
                 row['salutation'] = ''
             if 'firstName' not in row:
                 row['firstName'] = ''
             if 'middleName' not in row:
                 row['middleName'] = ''
             if 'lastName' not in row:
                 row['lastName'] = ''
             if 'nameSuffix' not in row:
                 row['nameSuffix'] = ''
             if 'nickName' not in row:
                 row['nickName'] = ''
             input_data = ContactData()
             parsedName = HumanName(row['fullName'])
             ap = AddressParser()
             parsedAddress = ap.parse_address(row['fullTextAddress'])
             input_data.fullName = row['fullName']
             if not row['company']:
                 input_data.company = row['company']
             if not row['title']:
                 input_data.title = row['title']
             if not row['salutation']:
                 input_data.salutation = parsedName.title
             else:
                 input_data.salutation = row['title']
             if not row['firstName']:
                 input_data.firstName = parsedName.first
             else:
                 input_data.firstName = row['firstName']
             if not row['middleName']:
                 input_data.middleName = parsedName.middle
             else:
                 input_data.middleName = row['middleName']
             if not row['lastName']:
                 input_data.lastName = parsedName.last
             else:
                 input_data.lastName = row['lastName']
             if not row['nameSuffix']:
                 input_data.nameSuffix = parsedName.suffix
             else:
                 input_data.nameSuffix = row['nameSuffix']
             if not row['nickName']:
                 input_data.nickName = parsedName.nickname
             else:
                 input_data.nickName = row['nickName']
             input_data.fullTextAddress = row['fullTextAddress']
             if not row['streetPrefix']:
                 input_data.streetPrefix = parsedAddress.street_prefix
             else:
                 input_data.streetPrefix = row['streetPrefix']
             if not row['houseNumber']:
                 input_data.houseNumber = parsedAddress.house_number
             else:
                 input_data.houseNumber = row['houseNumber']
             if not row['streetName']:
                 input_data.streetName = parsedAddress.street
             else:
                 input_data.streetName = row['streetName']
             if not row['streetSuffix']:
                 input_data.streetSuffix = parsedAddress.street_suffix
             else:
                 input_data.streetSuffix = row['streetSuffix']
             if not row['apartment']:
                 input_data.apartment = parsedAddress.apartment
             else:
                 input_data.apartment = row['apartment']
             if not row['building']:
                 input_data.building = parsedAddress.building
             else:
                 input_data.building = row['building']
             if not row['city']:
                 input_data.city = parsedAddress.city
             else:
                 input_data.city = row['city']
             if not row['state']:
                 input_data.state = parsedAddress.state
             else:
                 input_data.state = row['state']
             if not row['zip']:
                 input_data.zip = parsedAddress.zip
             else:
                 input_data.zip = row['zip']
             input_data.created_by = self.request.user
             input_data.owner = Group.objects.get(
                 pk=request.POST.get('owner'))
             input_data.save()
         return HttpResponseRedirect('/people/contacts/')
示例#38
0
import os
from address import Address, AddressParser


if __name__ == '__main__':
    # The mini test program takes a list of addresses, creates Address objects, and prints errors for each one
    # with unmatched terms. Takes a filename as the first and only argument. The file should be one address per line.
    if len(sys.argv) != 2:
        print("Usage: test_list.py filename")
        sys.exit(1)
    if not os.path.exists(sys.argv[1]):
        print("File {0} does not exist".format(sys.argv[1]))
        sys.exit(2)
    unmatched_count = 0
    line_count = 0
    ap = AddressParser()
    with open(sys.argv[1]) as input:
        for line in input:
            addr = ap.parse_address(line.strip(), line_number=line_count)

            if addr.unmatched:
                print("Unmatched", addr, addr.line_number)
                print("")
                unmatched_count = unmatched_count + 1
            # All addresses have a house number and a street.
            if addr.house_number is None:
                print("House number cannot be None: ", addr, addr.line_number)
            if addr.street is None:
                print("Street cannot be None: ", addr, addr.line_number)
            line_count = line_count + 1
            print(addr.full_address())