def tweets_to_df(keyword=None, inGeom=None, epsg=None, LANG='pt', NTWEETS=1000, tweetType='mixed', apiKey=None, dropFields=None): """ Search for Tweets and Export them to XLS """ from gasp import goToList if not inGeom and not keyword: raise ValueError('inGeom or keyword, one of them are required') if inGeom and not epsg: raise ValueError('inGeom implies epsg') if inGeom: from gasp.anls.prox.bf import getBufferParam x, y, dist = getBufferParam(inGeom, epsg, outSRS=4326) dist = float(dist) / 1000 else: x, y, dist = None, None, None data = search_tweets(lat=y, lng=x, radius=dist, keyword=keyword, NR_ITEMS=NTWEETS, only_geo=None, __lang=LANG, resultType=tweetType, key=apiKey) try: if not data: return 0 except: pass if keyword: data["keyword"] = keyword else: data["keyword"] = 'nan' dropFields = goToList(dropFields) if dropFields: data.drop(dropFields, axis=1, inplace=True) return data
def places_by_query(bfShp, epsgIn, keyword=None, epsgOut=4326, _limit='100', onlySearchAreaContained=True): """ Get absolute location of facebook data using the Facebook API and Pandas to validate data. Works only for the 'places' search type buffer_shp cloud be a shapefile with a single buffer feature or a dict like: buffer_shp = { x: x_value, y: y_value, r: dist } or a list or a tuple: buffer_shp = [x, y, r] """ import pandas from shapely.geometry import Polygon, Point from geopandas import GeoDataFrame from gasp.anls.prox.bf import getBufferParam from gasp.web.dsn.fb.search import by_query search_type = 'place' x_center, y_center, dist = getBufferParam(bfShp, epsgIn, outSRS=4326) data = by_query( search_type, keyword=keyword, x_center=x_center, y_center=y_center, dist=dist, limit=_limit, face_fields=[ "location", "name", "category_list", "about", "checkins", "description", "fan_count" ] ) try: if not data: # Return NoData return 0 except: pass # Sanitize category_list field data = pandas.concat([ data.drop(["category_list"], axis=1), data["category_list"].apply(pandas.Series) ], axis=1) _int_cols = [ c for c in data.columns.values if type(c) == long ] __int_cols = { x : "col_{}".format(str(x)) for x in _int_cols } data.rename(columns=__int_cols, inplace=True) data.rename(columns={"id" : "id_1", "name" : "name_1"}, inplace=True) for k in __int_cols: data = pandas.concat([ data.drop([__int_cols[k]], axis=1), data[__int_cols[k]].apply(pandas.Series) ], axis=1) data.rename(columns={ 'id' : 'id_' + str(k+2), 'name' : 'name_' + str(k+2) }, inplace=True) if long(0) in list(data.columns.values): data.drop([0], axis=1, inplace=True) # Pandas dataframe to Geopandas Dataframe geoms = [Point(xy) for xy in zip(data.longitude, data.latitude)] data.drop(["latitude", "longitude"], axis=1, inplace=True) gdata = GeoDataFrame(data, crs={'init' : 'epsg:4326'}, geometry=geoms) if onlySearchAreaContained: from shapely.wkt import loads from gasp.mng.prj import project_geom from gasp.anls.prox.bf import coord_to_buffer # Check if all retrieve points are within the search area _x_center, _y_center, _dist = getBufferParam( bfShp, epsgIn, outSRS=3857 ) search_area = coord_to_buffer( float(_x_center), float(_y_center), float(_dist) ) search_area = project_geom(search_area, 3857, 4326, api='ogr') search_area = loads(search_area.ExportToWkt()) gdata["tst_geom"] = gdata["geometry"].intersects(search_area) gdata = gdata[gdata["tst_geom"] == True] gdata.reset_index(drop=True, inplace=True) # Sanitize id gdata["fid"] = gdata["id_1"] gdata["fb_type"] = search_type __DROP_COLS = ["id_1", "city", "country", "street", "zip", "located_in"] DROP_COLS = [c for c in __DROP_COLS if c in gdata.columns.values] if onlySearchAreaContained: DROP_COLS.append("tst_geom") gdata.drop(DROP_COLS, axis=1, inplace=True) if epsgOut != 4326: gdata = gdata.to_crs({'init' : 'epsg:{}'.format(str(epsgOut))}) return gdata
def photos_location(buffer_shp, epsg_in, keyword=None, epsg_out=4326, onlySearchAreaContained=True, keyToUse=None): """ Search for data in Flickr and return a array with the same data buffer_shp cloud be a shapefile with a single buffer feature or a dict like: buffer_shp = { x: x_value, y: y_value, r: dist (in meters) } or a list or a tuple: buffer_shp = [x, y, radius] """ import pandas from shapely.geometry import Polygon, Point from shapely.wkt import loads from geopandas import GeoDataFrame from gasp.anls.prox.bf import coord_to_buffer from gasp.anls.prox.bf import getBufferParam from gasp.mng.prj import project_geom x_center, y_center, dist = getBufferParam(buffer_shp, epsg_in, outSRS=4326) # Retrive data from Flickr photos = search_photos(lat=y_center, lng=x_center, radius=float(dist) / 1000, keyword=keyword, apiKey=keyToUse) try: if not photos: # Return noData return 0 except: pass photos['longitude'] = photos['longitude'].astype(float) photos['latitude'] = photos['latitude'].astype(float) geoms = [Point(xy) for xy in zip(photos.longitude, photos.latitude)] gdata = GeoDataFrame(photos, crs={'init': 'epsg:4326'}, geometry=geoms) if onlySearchAreaContained: _x_center, _y_center, _dist = getBufferParam(buffer_shp, epsg_in, outSRS=3857) # Check if all retrieve points are within the search area search_area = coord_to_buffer(float(_x_center), float(_y_center), float(_dist)) search_area = project_geom(search_area, 3857, 4326, api='ogr') search_area = loads(search_area.ExportToWkt()) gdata["tst_geom"] = gdata["geometry"].intersects(search_area) gdata = gdata[gdata["tst_geom"] == True] gdata.reset_index(drop=True, inplace=True) gdata["fid"] = gdata["id"] if "url_l" in gdata.columns.values: gdata["url"] = gdata["url_l"] else: gdata["url"] = 'None' gdata["description"] = gdata["_content"] # Drop irrelevant fields cols = list(gdata.columns.values) delCols = [] for col in cols: if col != 'geometry' and col != 'description' and \ col != 'fid' and col != 'url' and col != 'datetaken' \ and col != 'dateupload' and col != 'title': delCols.append(col) else: continue gdata.drop(delCols, axis=1, inplace=True) if epsg_out != 4326: gdata = gdata.to_crs({'init': 'epsg:{}'.format(str(epsg_out))}) return gdata
def geotweets_location(inGeom, epsg_in, keyword=None, epsg_out=4326, onlySearchAreaContained=True, keyToUse=None): """ Search data in Twitter and array with that data inGeom cloud be a shapefile with a single buffer feature or a dict like: inGeom = { x: x_value, y: y_value, r: dist (in meters) } or a list or a tuple: inGeom = [x, y, radius] """ from shapely.geometry import Polygon, Point from geopandas import GeoDataFrame from gasp.anls.prox.bf import getBufferParam x_center, y_center, dist = getBufferParam(inGeom, epsg_in, outSRS=4326) # Extract data from Twitter data = search_tweets(lat=y_center, lng=x_center, radius=float(dist) / 1000, keyword=keyword, NR_ITEMS=500, only_geo=True, key=keyToUse) try: if not data: return 0 except: pass # Pandas to GeoPandas geoms = [Point(xy) for xy in zip(data.longitude, data.latitude)] data.drop(["latitude", "longitude"], axis=1, inplace=True) gdata = GeoDataFrame(data, crs={'init': 'epsg:4326'}, geometry=geoms) if onlySearchAreaContained: from shapely.wkt import loads from gasp.mng.prj import project_geom from gasp.anls.prox.bf import coord_to_buffer # Check if all retrieve points are within the search area _x_center, _y_center, _dist = getBufferParam(inGeom, epsg_in, outSRS=3857) search_area = coord_to_buffer(float(_x_center), float(_y_center), float(_dist)) search_area = project_geom(search_area, 3857, 4326, api='ogr') search_area = loads(search_area.ExportToWkt()) gdata["tst_geom"] = gdata["geometry"].intersects(search_area) gdata = gdata[gdata["tst_geom"] == True] gdata.reset_index(drop=True, inplace=True) gdata.drop("tst_geom", axis=1, inplace=True) if epsg_out != 4326: gdata = gdata.to_crs({'init': 'epsg:{}'.format(str(epsg_out))}) return gdata
def geovideos_to_array(search_words, epsg_out=4326, spatial_filter=None, epsg_filter=4326): """ Locate videos on youtube and save these locations to a vectorial file """ import os from gasp.oss.ops import del_files_by_name from gasp.to.geom import create_point from gasp.mng.prj import project_geom from gasp.anls.tplgy import point_in_polygon from gasp.anls.prox.bf import draw_buffer videos = get_video_details_by_keyword(search_words) videos_with_location = [] for video in videos: if video['y'] and video['x']: videos_with_location.append(video) if not len(videos_with_location): # Return nodata return 0 if spatial_filter: from gasp.anls.prox.bf import getBufferParam x_center, y_center, dist = getBufferParam( spatial_filter, epsg_filter) bufferCenter = project_geom( create_point(x_center, y_center, api='ogr'), 4326, 3857, api='ogr' ) bufferGeom = draw_buffer(bufferCenter, dist) filterData = [] for instance in videos_with_location: # Create point WGS_POINT = create_point( float(instance['x']), float(instance['y']), api='ogr' ) point = project_geom(WGS_POINT, 4326, 3857, api='ogr') isPointInPolygon = point_in_polygon(point, bufferGeom) if isPointInPolygon: if epsg_out != 4326: trans_point = project_geom(WGS_POINT, 4326, epsg_out, api='ogr') instance['x'] = trans_point.GetX() instance['y'] = trans_point.GetY() filterData.append(instance) return filterData