def transform(self, X, y=None): assert isinstance(X, pd.DataFrame) X['geohash_pickup'] = X.apply( lambda x: gh.encode(x.pickup_latitude, x.pickup_longitude, precision=self.precision), axis=1) X['geohash_dropoff'] = X.apply( lambda x: gh.encode(x.dropoff_latitude, x.dropoff_longitude, precision=self.precision), axis=1) return X[['geohash_pickup', 'geohash_dropoff']]
def geohash_grid(self, general_grid, data): general_grid['geohash'] = general_grid.apply( lambda x: gh.encode(x.lat, x.lon, precision=6), axis=1) data['geohash'] = data.apply( lambda x: gh.encode(x.lat, x.lon, precision=6), axis=1) result = general_grid[general_grid['geohash'].isin(data['geohash'])] return result
def get_geohashes_from_border(border_polygon, precision=7): x, y = border_polygon.exterior.coords.xy min_long = np.min(x) max_long = np.max(x) min_lat = np.min(y) max_lat = np.max(y) geohashes = [] starting_latitude, starting_longitude, latitude_delta, longitude_delta = pgh.decode_exactly( pgh.encode(min_lat, min_long, precision=precision)) ending_latitude, ending_longitude, latitude_delta, longitude_delta = pgh.decode_exactly( pgh.encode(max_lat, max_long, precision=precision)) for long_ in np.arange(starting_longitude, ending_longitude + longitude_delta * 2, longitude_delta * 2): for lat in np.arange(starting_latitude, ending_latitude + latitude_delta * 2, latitude_delta * 2): curr_geohash = pgh.encode(lat, long_, precision=precision) if border_polygon.intersects( get_polygon_from_geohash(curr_geohash)): geohashes.append(curr_geohash) return geohashes
def mapper_df(line): obj = json.loads(line) pgh.encode(float(obj['lat']), float(obj['lng'])) return Row(IDFA=obj['idfa'], lat=obj['lat'], lng=obj['lng'], geohash1=obj['geohash'], geohash2=pgh.encode(float(obj['lat']), float(obj['lng']), precision=8))
def load_map(self, args=None): if args is None: self.adj_map = self.net.adj_map self.nodes = self.net.nodes self.records = self.net.records orign, destination = args t1 = time() self.adj_map, self.nodes, self.records = self.net.get_loacl_map( encode(orign[1], orign[0], 5), encode(destination[1], destination[0], 5)) print("局部地图耗时", time() - t1)
def _extract_geo_features(df_test): geohash_4 = df_test.apply( lambda x: gh.encode(x.latitude, x.longitude, precision=4), axis=1) geohash_5 = df_test.apply( lambda x: gh.encode(x.latitude, x.longitude, precision=5), axis=1) geohash_6 = df_test.apply( lambda x: gh.encode(x.latitude, x.longitude, precision=6), axis=1) return pd.concat([ df_test["latitude"], df_test["longitude"], pd.get_dummies(geohash_4, prefix="gh4"), pd.get_dummies(geohash_5, prefix="gh5"), pd.get_dummies(geohash_6, prefix="gh6") ], axis=1)
def test_stats(self): data = [(50, 0), (-50, 0), (0, -50), (0, 50)] data = [pgh.encode(lat, lon) for lat, lon in data] # mean mean = pgh.mean(data) self.assertEqual(mean, '7zzzzzzzzzzz') # north north = pgh.northern(data) self.assertEqual(north, 'gbzurypzpgxc') # south south = pgh.southern(data) self.assertEqual(south, '5zpgxczbzury') # east east = pgh.eastern(data) self.assertEqual(east, 'mpgxczbzuryp') # west west = pgh.western(data) self.assertEqual(west, '6zurypzpgxcz') var = pgh.variance(data) self.assertAlmostEqual(var, 30910779278721.996, places=2) std = pgh.std(data) self.assertAlmostEqual(std, 5559746.332227937, places=4)
def predict(points, initialTime, distance, ETA): # called for each of the routes predicted time_per_unit_dist = ETA / distance currentTime = int(initialTime.split(":")[0]) * 60 + int( initialTime.split(":")[1]) input_data = [] for i in range(len(points) - 1): slat = points[i][0] elat = points[i + 1][0] slon = points[i][1] elon = points[i + 1][1] dist_points = 6371.01 * math.acos( sin(slat) * sin(elat) + cos(slat) * cos(elat) * cos(slon - elon)) geohash = gh.encode(points[i][0], points[i][1], precision=8) encoded_geohash = "-1" for key, value in category_codes.items(): if (value == geohash): encoded_geohash = key #encoded_geohash = category_codes[geohash] sine_time = getSine(currentTime) cosine_time = getCosine(currentTime) currentTime = currentTime + time_per_unit_dist * dist_points row = {} row['encoded_geohash'] = encoded_geohash row['sine_time'] = sine_time row['cosine_time'] = cosine_time input_data.append(row) x_test = pd.DataFrame(input_data) print(input_data) y_test = knn.predict(x_test) return y_test
def put_physical_event( name: str, description: str, owner: str, start_time: datetime.datetime, end_time: datetime.datetime, website: str, address: str, latitude: float, longitude: float, ): geohash = pygeohash.encode(latitude=latitude, longitude=longitude) location_data = { "address": address, "geohash": geohash, } return put_event( name=name, description=description, owner=owner, start_time=start_time, end_time=end_time, website=website, **location_data )
def road_match_one(self, pos, det=10): ''' :param poss: 轨迹数据 :param d_ghash: 每个区对应的路口 :param nodes:路口经纬度、路段信息 :return: ''' # 加载轨迹点 # 通过gHash定位局部地图 t, *pos = pos ghash = encode(pos[-1], pos[-2], 7) if ghash not in self.d_gHash: return None # 出界 inds = self.d_gHash[ghash] # 投影的方法匹配到路上 l_dist = [] s_road = set() # 所有的路 for i in inds: # 找出所有的路 for r in self.nodes[i][0]: if r not in s_road: # print(r,self.records[r][-2],self.records[r][-1]) s_road.add(r) dist, pos_ = self.match_dist( pos, (self.nodes[self.records[r][-2]][-2:], self.nodes[self.records[r][-1]][-2:])) # 起末点经纬度 # print(dist) if dist < det: l_dist.append((dist, pos_, t, r)) if l_dist: # 小于阈值的则认为是异常点 return min(l_dist, key=lambda x: x[0])
def fill_stations(): station_codes = {} with open("D:\\Studia\\inz\\imgw\\kody_stacji.csv") as file: reader = csv.reader(file, delimiter=';') for row in reader: station_codes[row[1]] = row[2] station_coord = {} station_hash = {} locator = Nominatim(user_agent="*****@*****.**") fails = [] for station in station_codes: try: location = locator.geocode(station_codes[station]) coord = (location.latitude, location.longitude) geohash = pgh.encode(location.latitude, location.longitude) station_coord[station] = coord station_hash[station] = geohash except AttributeError: fails.append(station) tb = sys.exc_info() print(tb) print("FAIL\t", station_codes[station]) for station in fails: del station_codes[station] with open('D:\\Studia\\inz\\imgw\\kody_stacji_full.csv', 'w') as file: for station in station_codes: line = f'{station};{station_codes[station]};{station_coord[station]};{station_hash[station]}\n' file.write(line) return station_codes, station_coord, station_hash
def create_hash_dirs(records): geoindex_dir = RESULTS_DIR.joinpath('geoindex') geoindex_dir.mkdir(exist_ok=True, parents=True) hashes = [] for record in records: src_airport = record.get('src_airport', {}) if src_airport: latitude = src_airport.get('latitude') longitude = src_airport.get('longitude') if latitude and longitude: geohash = pygeohash.encode(latitude, longitude) hashes.append(geohash) record['geohash'] = geohash hashes.sort() three_letter = sorted(list(set([entry[:3] for entry in hashes]))) hash_index = {value: [] for value in three_letter} for record in records: geohash = record.get('geohash') if geohash: hash_index[geohash[:3]].append(record) for key, values in hash_index.items(): output_dir = geoindex_dir.joinpath(str(key[:1])).joinpath(str(key[:2])) output_dir.mkdir(exist_ok=True, parents=True) output_path = output_dir.joinpath('{}.jsonl.gz'.format(key)) with gzip.open(output_path, 'w') as f: json_output = '\n'.join([json.dumps(value) for value in values]) f.write(json_output.encode('utf-8'))
def read_csv(filename,errors='warn'): ''' reads a csv file with types associated to each column ''' if errors not in {'warn','ignore','severe','debug','errors'}: raise ValueError(" errors must be one of 'warn','ignore','severe','debug','errors'",'Passed was ',errors) records=[] #List of records with open(filename,'r') as f: rows = csv.reader(f,delimiter=',',quotechar='"') #headers = next(rows) for i,parts in enumerate(rows,start=1): if parts: try: #parts = [ func(val) for func,val in zip(types,parts) ] parts1=[] parts[0]=parts[0].decode('utf-8-sig') timepart =parts[0].split() parts1.insert(0,pygeohash.encode(float(parts[1]),float(parts[2]))) parts1.insert(1,timepart[0]) parts1.insert(2,parts[0][:-4]) parts1.insert(3,float(parts[3])) parts1.insert(4,float(parts[4])) except: if errors =='warn': print('Bad Row ',parts, 'Row #',i) continue #Skips to next row records.append(parts1) return records
def dump(id_cam, ts, trackers, iteration, list_boxes, info_for_deduplicator, box_coords): import pygeohash as pgh import os filename = "singlecamera.in" if not os.path.exists(filename): f = open(filename, "w+") f.close() with open(filename, "a+") as f: # for i, tracker in enumerate([t for t in trackers if t.traj[-1].frame == iteration]): idx = 0 for i, tracker in enumerate(trackers): if tracker.id not in [ t.id for t in trackers if t.traj[-1].frame == iteration ]: continue lat = info_for_deduplicator[idx][ 0] # round(info_for_deduplicator[i][0], 14) lon = info_for_deduplicator[idx][ 1] # round(info_for_deduplicator[i][1], 14) geohash = pgh.encode(lat, lon, precision=7) cl = info_for_deduplicator[idx][2] speed = abs(tracker.ekf.xEst.vel) # info_for_deduplicator[i][3] yaw = tracker.ekf.xEst.yaw # info_for_deduplicator[i][4] pixel_x = info_for_deduplicator[idx][ 6] # OR list_boxes[tracker.idx].x # pixels[tracker.idx][0] pixel_y = info_for_deduplicator[idx][7] # pixels[tracker.idx][1] f.write( # f"{id_cam} {iteration} {ts} {cl} {lat:.14f} {lon:.14f} {geohash} {speed} {yaw} {id_cam}_{tracker.id} \ f"{id_cam} {iteration} {ts} {cl} {lat} {lon} {geohash} {speed} {yaw} {id_cam}_{tracker.id} {pixel_x} \ {pixel_y} {list_boxes[tracker.idx].w} {list_boxes[tracker.idx].h} {boxCoords[tracker.idx][0]} \ {boxCoords[tracker.idx][1]} {boxCoords[tracker.idx][2]} {boxCoords[tracker.idx][3]} \ {boxCoords[tracker.idx][4]} {boxCoords[tracker.idx][5]} {boxCoords[tracker.idx][6]} \ {boxCoords[tracker.idx][7]}\n") idx += 1
def read_markets( zip_code: str = None, latitude: float = None, longitude: float = None, radius: float = 1000): print(zip_code, latitude, longitude, radius) gmaps = None if zip_code is not None: #TODO we should verify that it is a valid German zip code # we first need to use the Geocoding API to map a German zip to latitude/longitude coordinates gmaps = googlemaps.Client(key=os.environ['GOOGLE_MAPS_KEY']) results = gmaps.geocode(address='%s+Deutschland' % zip_code) if len(results) > 0: latitude = results[0]['geometry']['location']['lat'] longitude = results[0]['geometry']['location']['lng'] else: print("ERROR: could not map ZIP code to coordinates.") raise HTTPException(status_code=404, detail="Could not map ZIP code to coordinates.") cached_query = find_query(latitude, longitude, radius) if cached_query is None: if gmaps is None: gmaps = googlemaps.Client(key=os.environ['GOOGLE_MAPS_KEY']) result = gmaps.places_nearby((latitude, longitude), radius=radius, keyword='supermarkt') add_query_to_cache(latitude, longitude, radius, result) tbl = boto3.session.Session().resource('dynamodb').Table("supermarket") if 'results' in result: for market in result['results']: item = { 'place_id': market["place_id"], 'geohash': pgh.encode( market['geometry']['location']['lat'], market['geometry']['location']['lng'], precision=6), # quick and dirty solution to replace all floats bx Decimal objects 'result': json.loads(json.dumps(market), parse_float=Decimal) } tbl.put_item(Item=item) else: result = cached_query markets = [] if 'results' in result: for market in result['results']: markets.append({ "name": market["name"], "latitude": market['geometry']['location']['lat'], "longitude": market['geometry']['location']['lng'], "vicinity": market['vicinity'], "id": market["place_id"], "distance": distance(latitude, longitude, market['geometry']['location']['lat'], market['geometry']['location']['lng']) }) if 'opening_hours' in market: if 'open_now' in market['opening_hours']: markets[-1]['open_now'] = market['opening_hours']['open_now'] else: print("Did not find open_now in %s" % market['opening_hours']) return markets
def find(user_input, record_type): place = geolocator.geocode(user_input) geohash = pygeohash.encode(latitude=place.latitude, longitude=place.longitude) return [ r for r in search_nearby(geohash, record_type) if r["distance_to_search"] / METERS_PER_MILE <= 25 ]
def get_geohash_id(dataFrame): udf_geohash = F.udf(lambda x, y: pgh.encode(x, y, precision=7)) pickup_geohash = dataFrame.select( 'Trip_Pickup_DateTime', 'Start_Lat', 'Start_Lon', udf_geohash('Start_Lat', 'Start_Lon').alias('geo_hash_id')) dropoff_geohash = dataFrame.select( 'Trip_Dropoff_DateTime', 'End_Lat', 'End_Lon', udf_geohash('End_Lat', 'End_Lon').alias('geo_hash_id')) return pickup_geohash, dropoff_geohash
def geohash_df(gdf, latitude='LATITUDE', longitude='LONGITUDE', precision=6, col_name='GEOHASH'): gdf[col_name] = gdf.apply(lambda row: pgh.encode( row[latitude], row[longitude], precision=precision), axis=1) return gdf
def GeoToGeohash(self, latitude,longitude, precision=4): ''' Use pygeohash to transfer latitude and longitude to goehash precision: define the precision of geohash ''' result=pgh.encode(latitude,longitude, precision=precision) return result
def Get_Geohash(df): GEOHASH_PRECISION = 6 for i in range(0, len(df)): geohash = pgh.encode(df.iloc[i]['Latitude'], df.iloc[i]['Longitude'], GEOHASH_PRECISION) df.set_value(i, 'geohash', geohash) return df
def process_item(self, item, spider): address = item['address'] location = geo_locator.geocode(address).raw['geometry']['location'] hash = pgh.encode(location['lat'], location['lng']) item['geohash'] = hash item['longitude'] = location['lng'] item['latitude'] = location['lat'] return item
def pandas_transform(df: DataFrame) -> DataFrame: """ Normalized Dataframe columns with JSON dictionaries into there own column. Extract Year, month, and Day from inspection_date column. Then create Geohash column from latitude and longitude. :param df: DataFrame :return: df: DataFrame """ # Explode python dicts in address column to their own columns df2: DataFrame = pandas.json_normalize(df['address']) df.drop(columns=['address'], inplace=True) # Convert json to python dicts in human_address column, # then exploding them to their own columns df3: DataFrame = pandas.json_normalize( df2['human_address'].map(lambda x: json.loads(x))) df2.drop(columns=['human_address'], inplace=True) # Join all new Dataframes by their indices into one new Dataframe # https://stackoverflow.com/a/36539295/3263650 df: DataFrame = df.merge(df2, how='outer', left_index=True, right_index=True) df: DataFrame = df.merge(df3, how='outer', left_index=True, right_index=True) del df2 del df3 df.dropna(subset=['latitude', 'longitude'], inplace=True) # Convert inspection_date to datetime object, then derive a year month day column from it. df['inspection_date'] = df['inspection_date'].map(datetime.fromisoformat) df[["year", "month", "day"]] = df.apply(lambda x: [ x['inspection_date'].year, x['inspection_date'].month, x[ 'inspection_date'].day ], axis=1, result_type="expand") df.drop(columns=['inspection_date'], inplace=True) # convert latitude and longitude to floats, then # combine those columns and apply the geohash function # to create the geohash column # https://stackoverflow.com/a/52854800/3263650 df['latitude'] = df['latitude'].map(float) df['longitude'] = df['longitude'].map(float) df['geohash'] = df[['latitude', 'longitude']].apply(lambda x: pygeohash.encode( latitude=x[0], longitude=x[1], precision=12), axis=1) return df
def calculate_match_range(geohash, radius=10): """Calculates lower and upper geohash boundaries for a given geohash and range in kilometers""" # Decode geohash latitude, longitude = pygeohash.decode(geohash) # Calculate lower boundaries lower_latitude = latitude - DEG_LATITUDE_PER_KM * radius lower_longitude = longitude - DEG_LONGITUDE_PER_KM * radius # Calculate upper boundaries upper_latitude = latitude + DEG_LATITUDE_PER_KM * radius upper_longitude = longitude + DEG_LONGITUDE_PER_KM * radius # Encode boundaries lower = pygeohash.encode(lower_latitude, lower_longitude) upper = pygeohash.encode(upper_latitude, upper_longitude) return lower, upper
def transform(self, X, y=None): print("geo") assert isinstance(X, pd.DataFrame) X_ = X.copy() X_["geohash_pickup"] = X_.apply( lambda x: gh.encode(x.pickup_latitude, x.pickup_longitude, precision=self.precision), axis=1, ) print("geo inbetween") X_["geohash_dropoff"] = X_.apply( lambda x: gh.encode(x.dropoff_latitude, x.dropoff_longitude, precision=self.precision), axis=1, ) print(X_.head(1)) return X_[["geohash_pickup", "geohash_dropoff"]]
def get_list(): # read the csv file from nasa req = requests.get(NASA_ENDPOINT) data = io.StringIO(req.text) df = pd.read_csv(data) df['geohash'] = df.apply( lambda x: geohash.encode(x.latitude, x.longitude, precision=5), axis=1) records = df.to_dict(orient='row') return records
def loadData(): fileLink = open('Partition6467LinkData.csv','r') print('Start loading data.') linesLink = fileLink.readlines() linkData = {} geohash6 = {} geohash5 = {} for line in linesLink: string = line[0:-1] linkRow = string.split(',') linkPVID = linkRow[0] if linkRow[-3] != '': shapeInfo = linkRow[-3].split('|') temp = [] for shape in shapeInfo: temp.append(shape.split('/')) linkRow[-3] = temp rNode = linkRow[-3][0] shapeInfo = linkRow[-3] linkData[linkPVID] = shapeInfo # geohash precision 6 areaGeohash6 = pgh.encode(float(rNode[0]), float(rNode[1]), precision=6) if areaGeohash6 in geohash6: geohash6[areaGeohash6].append(linkPVID) else: geohash6[areaGeohash6] = [linkPVID] # geohash precision 5 areaGeohash5 = pgh.encode(float(rNode[0]), float(rNode[1]), precision=5) if areaGeohash5 in geohash5: geohash5[areaGeohash5].append(linkPVID) else: geohash5[areaGeohash5] = [linkPVID] print('Finished loading data.') fileLink.close() run(linkData, geohash5, geohash6)
def to_db(self, allow_codes): '''地图入库''' # 更新 session = Session() drop_table(Nodes, Base, engine=engine) drop_table(Records, Base, engine=engine) d_pos = {} #{(lon,lat):node} ,用于去重 for i, (shape, record) in enumerate(self.itrRecord()): # if i == 100:break points = shape.points code = record.code # 筛选 if code in allow_codes: from_pos = points[0] to_pos = points[-1] itr = (from_pos, to_pos) if any(self.boundery.contains_points(itr)): # 分级存储索引 if code in self.d_level: self.d_level[code].append(i) else: self.d_level[code] = [i] l_node = [] # 存储路网 for pos in itr: if pos in d_pos: # 单向路判断? node = d_pos[pos] else: # ind = self.get_gZone(pos) # if ind is not None: # self.gZone[ind].append(pos) self.nid += 1 node = self.nid d_pos[pos] = node n = Nodes(id=node, osm_id=record.osm_id, longitude=pos[0], latitude=pos[1], geohash5=encode(pos[1], pos[0], 5)) session.add(n) l_node.append(node) r = Records(bridge=self.bool_map[record.bridge], oneway=self.bool_map[record.oneway], tunnel=self.bool_map[record.tunnel], ref=record.ref, name=record.name, code=record.code, fclass=record.fclass, from_node=l_node[0], to_node=l_node[1]) session.add(r) record.extend(l_node) #加入属性 session.commit() session.close()
def compute_geohash(lat, lon): try: logging.debug(f'Computing geohash for {lat}, {lon}') geohash = pygeohash.encode(lat, lon) logging.debug(f'Geohash computed: {geohash}') except Exception as e: logging.error(f'An error occurred while computing geohash: {e}') geohash = None return geohash
def __init__(self, cs_size, name=None, tile_names=None, tile_filenames=None, stretch_factor=1, target_lat=-90, target_lon=170): do_schmidt = stretch_factor != 1 or target_lat != -90 or target_lon != 170 if name is None: if not do_schmidt: name = 'c{cs_size}_gridspec' else: name = 'c{cs_size}_s{stretch_factor}_t{target_geohash}_gridspec' if tile_names is None: tile_names = 'tile{tile_number}' if tile_filenames is None: if not do_schmidt: tile_filenames = 'c{cs_size}.{tile_name}.nc' else: tile_filenames = 'c{cs_size}_s{stretch_factor}_t{target_geohash}.{tile_name}.nc' filler_dict = dict( cs_size=cs_size, stretch_factor=f"{stretch_factor:.2f}".replace(".", "d"), target_geohash=pgh.encode(target_lat, target_lon), ) name = name.format(**filler_dict) tnames = [] filenames = [] for i in range(6): filler_dict['tile_number'] = i + 1 tnames.append(tile_names.format(**filler_dict)) filler_dict['tile_name'] = tnames[-1] filenames.append(tile_filenames.format(**filler_dict)) supergrid_lat, supergrid_lon = self.calc_supergrid_latlon( cs_size, stretch_factor, target_lat, target_lon) tile_attrs = dict(geometry="spherical", north_pole="0.0 90.0", projection="cube_gnomonic", discretization="logically_rectangular", conformal="FALSE") super(GridspecGnomonicCubedSphere, self).__init__( name=name, tile_filenames=filenames, contacts=self.get_contacts(name, tnames), contact_indices=self.get_contact_indices(cs_size), tiles=[ GridspecTile(name=tnames[i], supergrid_lats=supergrid_lat[i, ...], supergrid_lons=supergrid_lon[i, ...], attrs=tile_attrs) for i in range(len(tnames)) ])
def get_geohash(gps): if not gps: log.debug('Cannot calculate Geohash when no GPS-data exists') return None log.info('Calculating Geohash') extractor.add_decimal_lat_lon(gps) lat = gps.get('GPSLatitudeDec', None) lon = gps.get('GPSLongitudeDec', None) return pygeohash.encode(lat, lon) if lat and lon else None
def hash_zip(zip_path, precision_level, spark): # geohash precision(+/-): 8(19m), 7(76m), 6(0.61km), 5(2.4km), 4(20km), 3(78km), 2(630km) udf_gh = F.udf(lambda x, y: gh.encode(x, y, precision=precision_level)) ziphash_df = spark.read.format("csv").option("header", "true").load(zip_path)\ .select( F.col('zip'), udf_gh(F.col('latitude').cast('float'), F.col('longitude').cast('float')).alias('zipgeohash') ) return ziphash_df
def push_to_pandas(df): import pygeohash from cassandra.cluster import Cluster from kafka import KafkaProducer import timeit cluster = Cluster() session = cluster.connect('xweather') producer = KafkaProducer(bootstrap_servers=['vm1:9092']) name=multiprocessing.current_process().name #df = pd.read_csv(filename) df1= df[['id','lat','lon','src','elev','timezone','tzoffset']].drop_duplicates() df1.src.fillna('NA') # Adding Geohash Id df1['geohash_id']=df.apply(lambda row:pygeohash.encode(row['lat'],row['lon']),axis=1) #Now loop through the Dataframe for row in df1.itertuples(): j = ','.join((row[8],str(row[1]),str(row[5]),row[8][:3],str(row[2]),str(row[3]),str(row[4]),str(row[6]),str(row[7]))) future = producer.send('topic-weather-stations',j) print('Completed insert into weather stations',name) #Now to the facts #Remove the descriptive columns df.drop(df.columns[[1,2,3,4,5,6]],axis=1,inplace=True) #Unpivot the dataset df=pd.melt(df,id_vars=['id','timestamp','dateTime']) df=df.dropna() # Kafka it ctr =0; producer = KafkaProducer(bootstrap_servers=['vm1:9092'],batch_size=20000,linger_ms=50,buffer_memory=952108864) #producer = KafkaProducer(bootstrap_servers=['vm1:9092']) start_time = timeit.default_timer() for row in df.itertuples(): k=list(row) k=k[1:] j= ','.join(str(x) for x in k) future = producer.send('topic-weather-data',j) ctr+=1 print('Producer timing is ', name,timeit.default_timer() - start_time,'Rows:',ctr) producer.flush() producer.close()
def validate_inputs(latlong, reqtype='obs', days=0, metrics='all', maxdistance=30): # Validate the inputs passed in. The validation is primarily that the inputs are present # or the defaults get passed out return_list = [] validrequests = ['obs', 'forecast'] try: # Validate the Lat/Long passed l_context = 'Get geohash' try: geohashid = pygeohash.encode(float(latlong[0]), float(latlong[1])) except: print("Invalid lat long passed.") sys.exit(1) l_context = 'Get request type' # Validate the request type request = reqtype if request not in validrequests: raise ValueError('Invalid Request type :' + str(request), 'valid requests are ', validrequests) # Validate the days l_context = 'Get days' if request != 'obs': if (days > 10) or (days <= 0): days = 1 else: days = 0 # Validate Max Distance l_context = 'Get maxdistance' if maxdistance > 30: distance = 30 else: distance = maxdistance except ValueError as err: print (err.args) return -1 except Exception, err: print('Error in inputs while processing ' + l_context, err) return -1
def test_stats(self): data = [(50, 0), (-50, 0), (0, -50), (0, 50)] data = [pgh.encode(lat, lon) for lat, lon in data] # mean mean = pgh.mean(data) self.assertEqual(mean, '7zzzzzzzzzzz') # north north = pgh.northern(data) self.assertEqual(north, 'gbzurypzpgxc') # south south = pgh.southern(data) self.assertEqual(south, '5zpgxczbzury') # east east = pgh.eastern(data) self.assertEqual(east, 'mpgxczbzuryp') # west west = pgh.western(data) self.assertEqual(west, '6zurypzpgxcz')
def test_encode(self): self.assertEqual(pgh.encode(42.6, -5.6), 'ezs42e44yx96') self.assertEqual(pgh.encode(42.6, -5.6, precision=5), 'ezs42')