def get_focal_point(location_list, r1): # list of location set around every point in tuple form (location, set) local_list_list = [] # iterate through location_list to create sets for each location for location in location_list: lat1 = location.get_lat() lon1 = location.get_lng() #print(str(lat1) + ', ' + str(lon1)) local_list = [] for other_location in location_list: lat2 = other_location.get_lat() lon2 = other_location.get_lng() #print(str(lat2) + ', ' + str(lon2)) try: dist = formulas.haversine(lat1, lon1, lat2, lon2) if dist <= float(r1): local_list.append(other_location) except: pass local_list_list.append((location, local_list)) #local set around focal point of locations in (address, lat, long) local_list = [] # focal point focal_point = None #iterate through dictionary to get largest set and focal point for loc, self_list in local_list_list: if (len(self_list) > len(local_list)): local_list = self_list focal_point = loc return (focal_point, local_list)
def direct_flight_builder(loc_a, coord_a, radius_a, loc_b, coord_b, radius_b): (lat1, lon1) = coord_a (lat2, lon2) = coord_b #ONLY DO THIS CALCULATION IF THE TWO LOCATIONS ARE MORE THAN N KM APART MINIMUM_DISTANCE = 100 if formulas.haversine(lat1, lon1, lat2, lon2) < MINIMUM_DISTANCE: return # prepare the csv output with open('outputs/flights.csv', 'a', newline="\n", encoding='utf-8-sig') as out_file: # search through set of airports to get the set of airports within the radius for each location loc_a_airports = organized_set.get_subset(coord_a, radius_a) loc_b_airports = organized_set.get_subset(coord_b, radius_b) airports_a_list = [] for close_airport in loc_a_airports: airports_a_list.append(str(close_airport.iata)) airports_b_list = [] for close_airport in loc_b_airports: airports_b_list.append(str(close_airport.iata)) csv_writer = csv.writer(out_file, delimiter=',') #write the first several values date_list = [ loc_a, loc_b, str(radius_a), str(radius_b), '; '.join(airports_a_list), '; '.join(airports_b_list), str(coord_a), str(coord_b) ] #keep track of the flights themselves direct_flights_set = set() for year in range(1990, 2019): for month in range(1, 13): num_of_flights = 0 date = int(str(year) + str(month)) #data ends in August, 2018 if date > 20188: break # check if there is a flight from some airport in a to some airport in b for airport_a in loc_a_airports: for airport_b in loc_b_airports: if (airport_a.has_flight(date, airport_b)): num_of_flights += 1 direct_flights_set.add( str(airport_a.iata) + "-" + str(airport_b.iata)) date_list.append(str(num_of_flights)) date_list.insert(6, '; '.join(direct_flights_set)) csv_writer.writerow(date_list)
def create_remote_set(focal_point, location_list, r2): #remote set around focal point of locations in (address, lat, long) remote_set = set() lat1 = focal_point[0] lon1 = focal_point[1] for loc in location_list: lat2 = loc[0] lon2 = loc[1] dist = formulas.haversine(lat1, lon1, lat2, lon2) if dist > float(r2): remote_set.add(loc) return remote_set
def get_focal_point(location_list, r1): # get lat and long for each address ''' for address in address_list: response = requests.get("http://dev.virtualearth.net/REST/v1/Locations/" + address, params={"include":"queryParse", "key":"AvQOaBs2cYn6OAWmZ9tEAvGuJGfJusGnLSyHnD9g7USe35x69PmSiyk_51Htk3Z0"}) data = response.json() lat = data['resourceSets'][0]['resources'][0]['point']['coordinates'][0] lng = data['resourceSets'][0]['resources'][0]['point']['coordinates'][1] # add to location_list location_list.append(address, lat, lng) print(str(lat) + ", " + str(lng)) ''' #list of location set around every point in tuple form (location, set) local_set_list = [] # iterate through location_list to create sets for each location for location in location_list: lat1 = location[0] lon1 = location[1] local_set = set() for other_location in location_list: lat2 = other_location[0] lon2 = other_location[1] try: dist = formulas.haversine(lat1, lon1, lat2, lon2) if dist <= float(r1): local_set.add(other_location) except: pass local_set_list.append((location, local_set)) #local set around focal point of locations in (address, lat, long) local_set = set() # focal point focal_point = None #iterate through dictionary to get largest set and focal point for loc, self_set in local_set_list: if (len(self_set) > len(local_set)): local_set = self_set focal_point = loc return (focal_point, local_set)
def create_remote_list(focal_point, location_list, r2): #remote list around focal point of locations in (address, lat, long) remote_list = [] lat1 = focal_point.get_lat() lon1 = focal_point.get_lng() for loc in location_list: lat2 = loc.get_lat() lon2 = loc.get_lng() try: dist = formulas.haversine(lat1, lon1, lat2, lon2) if dist > float(r2): remote_list.append(loc) except: pass return remote_list
def checklist(self, busList, threshold, routekey, timestamp): bunching_incidences = [] for a, b in itertools.combinations(busList, 2): result = haversine(a.latitude, a.longitude, b.latitude, b.longitude) if (result < threshold): location = midpoint(a.latitude, a.longitude, b.latitude, b.longitude) bi = BunchingInstance(routekey, location[0], location[1], timestamp) bunching_incidences.append(bi) return bunching_incidences
def get_subset(self, coordinates, radius): (lat, lng) = coordinates #check that the coordinates exist if (abs(lat) > 90 or abs(lng > 180)): raise ValueError("get_subset: the coordinates are invalid") #determine the row and column associated with the coordinates (row, column) = self.__get_row_and_column(coordinates) key = self.__get_key(row, column) #create the set of airports - give it the airports in the same box as coordinate close_airports = self.sets[key] #the formula assumes units in kilometers! dlat = formulas.change_in_latitude(radius) #check the corners of the radius bounding box latHigh = lat + dlat latLow = lat - dlat dlngHigh = formulas.change_in_longitude(latHigh, radius) dlngLow = formulas.change_in_longitude(latLow, radius) (row_u_l, column_u_l) = self.__get_row_and_column((latHigh, lng - dlngHigh)) (row_l_r, column_l_r) = self.__get_row_and_column((latLow, lng + dlngLow)) # union all airports encompassed by bounding box - from upper left corner to lower right drow = row_l_r - row_u_l dcolumn = column_l_r - column_u_l for i in range(drow + 1): for j in range(dcolumn + 1): temp_key = self.__get_key(row_u_l + i, column_u_l + j) close_airports.update(self.sets[temp_key]) # the actual set of airports within the radius airportSet = set() for airport in close_airports: airportCoord = airport.coordinates dist = formulas.haversine(coordinates[0], coordinates[1], airportCoord[0], airportCoord[1]) if dist <= radius: airportSet.add(airport) return airportSet
def percent_coverage_generator(center, big_cluster, percent_coverage): # percentage coverage # first create a list of tuples (distance, coord) loc_list_tuples = [] for group in big_cluster: #list of tuples location_list = group.get_locations() for coord in location_list: dist = formulas.haversine(center.get_lat(), center.get_lng(), coord[0], coord[1]) loc_list_tuples.append((dist, coord)) loc_list_tuples.sort(key=lambda tup: tup[0]) # sorts in place num_to_include = math.ceil(float(percent_coverage) * len(loc_list_tuples)) local_inventors = [] for coord in loc_list_tuples[:num_to_include]: local_inventors.append(coord[1]) radius_for_percent = loc_list_tuples[num_to_include - 1][0] # dict for local inventors local_inventor_dict = { 'radius': radius_for_percent, 'count': num_to_include, 'locations': local_inventors } return local_inventor_dict
def output_each_patent(ungrouped, patent, r1, r2, company_id, company): global api_calls #get the local locations (local_center, local_set) = get_focal_point(ungrouped, r1) #get the remote locations remote_set = create_remote_list(local_center, ungrouped, r2) #find the locations that are not local and not remote inbetween = [] for loc in ungrouped: if loc not in remote_set and not loc in local_set: inbetween.append(loc) #get the country data country = local_center.get_country() state = local_center.get_state() if country != 'US': local_center.set_state("N/A") elif country == 'US': if state == '': coord2 = str(local_center.get_lat()) + "," + str( local_center.get_lng()) response2 = requests.get( "http://dev.virtualearth.net/REST/v1/Locations/" + coord2, params={ "key": formulas.get_api_key(), }) api_calls += 1 data2 = response2.json() try: state = str(data2['resourceSets'][0]['resources'][0]['address'] ['adminDistrict']) local_center.set_state(state) except: local_center.set_state("N/A") else: coord2 = str(other_center.get_lat()) + "," + str( other_center.get_lng()) response2 = requests.get( "http://dev.virtualearth.net/REST/v1/Locations/" + coord2, params={ "key": formulas.get_api_key(), }) api_calls += 1 data2 = response2.json() if state == '': try: country = str(data2['resourceSets'][0]['resources'][0] ['address']['countryRegion']) if country == 'US': state = str(data2['resourceSets'][0]['resources'][0] ['address']['adminDistrict']) local_center.set_state(state) else: local_center.set_state("N/A") except: country = "N/A" local_center.set_state("N/A") else: try: country = str(data2['resourceSets'][0]['resources'][0] ['address']['countryRegion']) except: country = "N/A" #list of sets of remote groups remote_groups = [] while len(remote_set) > 0: #get largest remote group, add to remote groups and remove from set of ungrouped remotes (remote_center, remote_group) = get_focal_point(remote_set, r1) remote_groups.append((remote_center, remote_group)) for loc in remote_group: remote_set.remove(loc) with open(output_name, 'a', newline="\n", encoding='latin-1') as out_file: csv_writer = csv.writer(out_file, delimiter='\t') global cluster_id # local cluster row = [] row.append(cluster_id) row.append(company_id) row.append(company) row.append(patent) row.append(r1) row.append(r2) # convert local_set from a set of tuples to a list of strings local_set_string = [] for loc in local_set: coord = '(' + str(loc.get_lat()) + ' ' + str(loc.get_lng()) + ')' local_set_string.append(coord) # row for local_cluster row.append(len(local_set)) row.append('; '.join(local_set_string)) row.append(local_center.get_lat()) row.append(local_center.get_lng()) row.append(local_center.get_state()) row.append(country) row.append('domestic') row.append('N/A') csv_writer.writerow(row) cluster_id += 1 # nonlocal cluster # convert inbetween set from a set of tuples to a list of strings inbetween_set_string = [] for loc in inbetween: coord = '(' + str(loc.get_lat()) + ' ' + str(loc.get_lng()) + ')' inbetween_set_string.append(coord) if len(inbetween_set_string) == 0: row.append('N/A') else: row = [] row.append(cluster_id) row.append(company_id) row.append(company) row.append(patent) row.append(r1) row.append(r2) row.append(len(inbetween)) row.append('; '.join(inbetween_set_string)) row.append('N/A') row.append('N/A') row.append('N/A') row.append('N/A') row.append('N/A') row.append('N/A') csv_writer.writerow(row) cluster_id += 1 if (len(remote_groups) == 0): row.append('N/A') else: # sort remote groups by distance away from local focal point remote_group_list = [] for remote_group in remote_groups: (loc, group) = remote_group size = len(group) remote_group_list.append((loc, group, size)) remote_group_list.sort( key=lambda tup: tup[2]) # sorts in place remote_group_list.reverse() for remote_group in remote_group_list: (center, group, size) = remote_group # convert remote_group from a set of tuples to a list of strings remote_group_string = [] for loc in group: coord = '(' + str(loc.get_lat()) + ' ' + str( loc.get_lng()) + ')' remote_group_string.append(coord) (c1, c2, rel) = generate_geo_relationship(country, center) # write remote_group row = [] row.append(cluster_id) row.append(company_id) row.append(company) row.append(patent) row.append(r1) row.append(r2) row.append(len(group)) row.append('; '.join(remote_group_string)) row.append(center.get_lat()) row.append(center.get_lng()) row.append(center.get_state()) row.append(c2) row.append(rel) row.append( formulas.haversine(local_center.get_lat(), local_center.get_lng(), center.get_lat(), center.get_lng())) csv_writer.writerow(row) cluster_id += 1
def output_each_patent(ungrouped, company, company_id, base_radius, coverage_percentage): print(company) print(len(ungrouped)) #get the clusters centered around the headquarters (hq, hq_set) = get_focal_point(ungrouped, base_radius) #get the remote locations - assume that every cluster not in the headquarters's radius is a remote location remote_set = [i for i in ungrouped if not i in hq_set] #row to write row = [ company, company_id, len(hq_set), len(remote_set), len(hq_set) + len(remote_set), str(base_radius), str(coverage_percentage) ] # get local country and state country = hq.get_country() state = hq.get_state() if country != 'US': hq.set_state("N/A") elif country == 'US': if state == '': coord2 = str(other_center.get_lat()) + "," + str( other_center.get_lng()) response2 = requests.get( "http://dev.virtualearth.net/REST/v1/Locations/" + coord2, params={ "key": formulas.get_api_key(), }) api_calls += 1 data2 = response2.json() try: state = str(data2['resourceSets'][0]['resources'][0]['address'] ['adminDistrict']) hq.set_state(state) except: hq.set_state("N/A") else: coord2 = str(other_center.get_lat()) + "," + str( other_center.get_lng()) response2 = requests.get( "http://dev.virtualearth.net/REST/v1/Locations/" + coord2, params={ "key": formulas.get_api_key(), }) api_calls += 1 data2 = response2.json() if state == '': try: country = str(data2['resourceSets'][0]['resources'][0] ['address']['countryRegion']) if country == 'US': state = str(data2['resourceSets'][0]['resources'][0] ['address']['adminDistrict']) hq.set_state(state) else: hq.set_state("N/A") except: country = "N/A" hq.set_state("N/A") else: try: country = str(data2['resourceSets'][0]['resources'][0] ['address']['countryRegion']) except: country = "N/A" #list of sets of remote groups remote_groups = [] while len(remote_set) > 0: #print(len(remote_set)) #get largest remote group, add to remote groups and remove from set of ungrouped remotes (remote_center, remote_group) = get_focal_point(remote_set, base_radius) print(len(remote_set)) remote_groups.append((remote_center, remote_group)) for loc in remote_group: remote_set.remove(loc) with open(output_name, 'a', newline="\n", encoding='utf-8-sig') as out_file: csv_writer = csv.writer(out_file, delimiter='\t') # convert local_set from a set of tuples to a list of strings local_set_string = [] for loc in hq_set: coord = loc.get_group_id() local_set_string.append(coord) # dict for local_cluster local_cluster_dict = { 'number_of_patent_groups_in_cluster': len(local_set_string), 'locations_id': '; '.join(local_set_string), 'center_lat': hq.get_lat(), 'center_lng': hq.get_lng(), 'state': state, 'country': country, 'geographical_relationship': 'domestic', 'haversine_distance_to_local': 'N/A' } row.append(local_cluster_dict) # sort remote groups by distance away from local focal point remote_group_list = [] for remote_group in remote_groups: (loc, group) = remote_group size = len(group) remote_group_list.append((loc, group, size)) remote_group_list.sort(key=lambda tup: tup[2]) # sorts in place remote_group_list.reverse() #this is the number of remote groups of clusters num_of_remote_groups = len(remote_group_list) total_num_of_groups = num_of_remote_groups + 1 #add that information to the row row.insert(5, total_num_of_groups) row.insert(6, num_of_remote_groups) write_remote_cluster = [] for remote_group in remote_group_list: (center, group, size) = remote_group # convert remote_group from a set of tuples to a list of strings remote_group_string = [] for loc in group: coord = loc.get_group_id() remote_group_string.append(coord) (c1, c2, rel) = generate_geo_relationship(country, center) # dict for remote_cluster remote_cluster_dict = { 'number_of_patent_groups_in_cluster': len(group), 'locations_id': '; '.join(remote_group_string), 'center_lat': center.get_lat(), 'center_lng': center.get_lng(), 'state': center.get_state(), 'country': c2, 'geographical_relationship:': rel, 'haversine_distance_to_local': formulas.haversine(hq.get_lat(), hq.get_lng(), center.get_lat(), center.get_lng()) } write_remote_cluster.append(remote_cluster_dict) if (len(write_remote_cluster) == 0): row.append('N/A') else: row.append(write_remote_cluster) # percentage coverage for local local_inventor_dict = percent_coverage_generator( hq, hq_set, fast_real(coverage_percentage)) row.append(local_inventor_dict) # percentage coverage for remote if num_of_remote_groups == 0: row.append('N/A') else: remote_inventor_dict_list = [] for remote_group in remote_group_list: (center, group, size) = remote_group remote_inventor_dict = percent_coverage_generator( center, group, fast_real(coverage_percentage)) remote_inventor_dict_list.append(remote_inventor_dict) row.append(remote_inventor_dict_list) csv_writer.writerow(row)
def output_each_patent(ungrouped, patent, r1, r2): #get the local locations (local_center, local_set) = get_focal_point(ungrouped, r1) #get the remote locations remote_set = create_remote_set(local_center, ungrouped, r2) #find the locations that are not local and not remote inbetween = set(ungrouped) - remote_set - local_set #row to write row = [] row.append(patent) row.append(len(ungrouped)) row.append(len(local_set)) row.append(len(remote_set)) # get local country coord1 = str(local_center[0]) + "," + str(local_center[1]) response1 = requests.get( "http://dev.virtualearth.net/REST/v1/Locations/" + coord1, params={ "key": "AjhzSUKjNFFV0ckKVCV64tSLhw_EWSlN6LP9UPiWdEJDRMZn3Vm17HtoSclZZfO_ ", }) data1 = response1.json() #get the country data try: country1 = str(data1['resourceSets'][0]['resources'][0]['address'] ['countryRegion']) except: country1 = "N/A" #list of sets of remote groups remote_groups = [] while len(remote_set) > 0: #get largest remote group, add to remote groups and remove from set of ungrouped remotes remote_group = get_focal_point(remote_set, r1) remote_groups.append(remote_group) remote_set -= remote_group[1] row.append(1 + len(remote_groups)) # number of clusters row.append(r1) # local radius row.append(r2) # remote radius with open('outputs/output.csv', 'a', newline="\n", encoding='latin-1') as out_file: csv_writer = csv.writer(out_file, delimiter=',') ''' header = ["patent_id", "number_of_inventors", "number_of_local_inventors", "number_of_remote_inventors", "number_of_clusters (local+remote)", "radius_local", "radius_remote", "local_cluster", "nonlocal_cluster", "remote_cluster"] csv_writer.writerow(header) ''' # header = ["group_classification", "locations", "point_lat", "point_lng", "country", "geographical_relationship", "haversine_distance_to_local"] # convert local_set from a set of tuples to a list of strings local_set_string = [] for (lat, lon, id) in local_set: coord = '(' + str(lat) + ',' + str(lon) + ')' local_set_string.append(coord) # dict for local_cluster local_cluster_dict = { 'number_of_inventors_in_cluster': len(local_set), 'locations': '; '.join(local_set_string), 'center_lat': local_center[0], 'center_lng': local_center[1], 'country': country1, 'geographical_relationship:': 'domestic', 'haversine_distance_to_local': 'N/A' } row.append(local_cluster_dict) # convert inbetween set from a set of tuples to a list of strings inbetween_set_string = [] for (lat, lon, id) in inbetween: coord = '(' + str(lat) + ',' + str(lon) + ')' inbetween_set_string.append(coord) # dict for nonlocal_cluster nonlocal_cluster_dict = { 'number_of_inventors_in_cluster': len(inbetween), 'locations': '; '.join(inbetween_set_string), 'center_lat': 'N/A', 'center_lng': 'N/A', 'country': 'N/A', 'geographical_relationship:': 'N/A', 'haversine_distance_to_local': 'N/A' } row.append(nonlocal_cluster_dict) # sort remote groups by distance away from local focal point remote_group_list = [] for remote_group in remote_groups: (coordinates, group) = remote_group dist = formulas.haversine(local_center[0], local_center[1], coordinates[0], coordinates[1]) remote_group_list.append((coordinates, group, dist)) remote_group_list.sort(key=lambda tup: tup[2]) # sorts in place for remote_group in remote_group_list: (coordinates, group, dist) = remote_group # convert remote_group from a set of tuples to a list of strings remote_group_string = [] for (lat, lon, id) in group: coord = '(' + str(lat) + ',' + str(lon) + ')' remote_group_string.append(coord) (c1, c2, rel) = generate_geo_relationship(country1, coordinates) # dict for remote_cluster remote_cluster_dict = { 'number_of_inventors_in_cluster': len(group), 'locations': '; '.join(remote_group_string), 'center_lat': coordinates[0], 'center_lng': coordinates[1], 'country': c2, 'geographical_relationship:': rel, 'haversine_distance_to_local': formulas.haversine(local_center[0], local_center[1], coordinates[0], coordinates[1]) } row.append(remote_cluster_dict) csv_writer.writerow(row)
import formulas ''' word = "(32.2217 -110.9258); (32.2217 -110.9258)" list = word.split("; ") print(list) for s in list: oc = s[s.find("(")+1:s.find(")")] coord = oc.split(" ") print(coord) ''' print(formulas.haversine(37.9375, -107.8117, 40.7975, -81.165))
def output_each_patent(ungrouped, company, id, r1, r2): #get the local locations (local_center, local_set) = get_focal_point(ungrouped, r1) #get the remote locations remote_set = create_remote_list(local_center, ungrouped, r2) #find the locations that are not local and not remote inbetween = [] for loc in ungrouped: if loc not in remote_set and not loc in local_set: inbetween.append(loc) #row to write row = [] row.append(company) row.append(id) row.append(len(ungrouped)) row.append(len(local_set)) row.append(len(remote_set)) #get the country data country = local_center.get_country() state = local_center.get_state() if country != 'US': local_center.set_state("N/A") elif country == 'US': if state == '': coord2 = str(other_center.get_lat()) + "," + str( other_center.get_lng()) response2 = requests.get( "http://dev.virtualearth.net/REST/v1/Locations/" + coord2, params={ "key": formulas.get_api_key(), }) data2 = response2.json() try: state = str(data2['resourceSets'][0]['resources'][0]['address'] ['adminDistrict']) local_center.set_state(state) except: local_center.set_state("N/A") else: coord2 = str(other_center.get_lat()) + "," + str( other_center.get_lng()) response2 = requests.get( "http://dev.virtualearth.net/REST/v1/Locations/" + coord2, params={ "key": formulas.get_api_key(), }) data2 = response2.json() if state == '': try: country = str(data2['resourceSets'][0]['resources'][0] ['address']['countryRegion']) if country == 'US': state = str(data2['resourceSets'][0]['resources'][0] ['address']['adminDistrict']) local_center.set_state(state) else: local_center.set_state("N/A") except: country = "N/A" local_center.set_state("N/A") else: try: country = str(data2['resourceSets'][0]['resources'][0] ['address']['countryRegion']) except: country = "N/A" #list of sets of remote groups remote_groups = [] while len(remote_set) > 0: #get largest remote group, add to remote groups and remove from set of ungrouped remotes (remote_center, remote_group) = get_focal_point(remote_set, r1) remote_groups.append((remote_center, remote_group)) for loc in remote_group: remote_set.remove(loc) row.append(1 + len(remote_groups)) # number of clusters row.append(r1) # local radius row.append(r2) # remote radius with open('outputs/groupings.tsv', 'a', newline="\n", encoding='latin-1') as out_file: csv_writer = csv.writer(out_file, delimiter='\t') # convert local_set from a set of tuples to a list of strings local_set_string = [] for loc in local_set: coord = '(' + str(loc.get_lat()) + ' ' + str(loc.get_lng()) + ')' local_set_string.append(coord) # dict for local_cluster local_cluster_dict = { 'number_of_inventors_in_cluster': len(local_set), 'locations': '; '.join(local_set_string), 'center_lat': local_center.get_lat(), 'center_lng': local_center.get_lng(), 'state': state, 'country': country, 'geographical_relationship': 'domestic', 'haversine_distance_to_local': 'N/A' } row.append(local_cluster_dict) # convert inbetween set from a set of tuples to a list of strings inbetween_set_string = [] for loc in inbetween: coord = '(' + str(loc.get_lat()) + ' ' + str(loc.get_lng()) + ')' inbetween_set_string.append(coord) if len(inbetween_set_string) == 0: row.append('N/A') else: # dict for nonlocal_cluster nonlocal_cluster_dict = { 'number_of_inventors_in_cluster': len(inbetween), 'locations': '; '.join(inbetween_set_string), 'center_lat': 'N/A', 'center_lng': 'N/A', 'state': 'N/A', 'country': 'N/A', 'geographical_relationship': 'N/A', 'haversine_distance_to_local': 'N/A' } row.append(nonlocal_cluster_dict) # sort remote groups by distance away from local focal point remote_group_list = [] for remote_group in remote_groups: (loc, group) = remote_group size = len(group) remote_group_list.append((loc, group, size)) remote_group_list.sort(key=lambda tup: tup[2]) # sorts in place remote_group_list.reverse() write_remote_cluster = [] for remote_group in remote_group_list: (center, group, size) = remote_group # convert remote_group from a set of tuples to a list of strings remote_group_string = [] for loc in group: coord = '(' + str(loc.get_lat()) + ' ' + str( loc.get_lng()) + ')' remote_group_string.append(coord) (c1, c2, rel) = generate_geo_relationship(country, center) # dict for remote_cluster remote_cluster_dict = { 'number_of_inventors_in_cluster': len(group), 'locations': '; '.join(remote_group_string), 'center_lat': center.get_lat(), 'center_lng': center.get_lng(), 'state': center.get_state(), 'country': c2, 'geographical_relationship': rel, 'haversine_distance_to_local': formulas.haversine(local_center.get_lat(), local_center.get_lng(), center.get_lat(), center.get_lng()) } write_remote_cluster.append(remote_cluster_dict) if (len(write_remote_cluster) == 0): row.append('N/A') else: row.append(write_remote_cluster) csv_writer.writerow(row)