def load_data_from_local(filename=None, verbose=True): bike_data = None full_filename = resolve_filepath(filename) try: bike_data = io.load_file(full_filename) except: if verbose: print "ERROR: Bike data not loaded." return bike_data
def load_features(): res = io.load_file('feature_file') y_nb = res['fremont_bridge_nb'] y_sb = res['fremont_bridge_sb'] y = res['y'] # Remove, date, y_nb and y_sb because they are answers. y_cols = ['fremont_bridge_nb', 'fremont_bridge_sb', 'y'] feature_cols = res.columns.drop(y_cols) return res[feature_cols], y
def pull_bike_data(bike_data=None, save_data=True, verbose=True): database_date_format, date_format, url, dataset_identifier = io.get_params( 'database_date_format', 'date_format', 'url', 'dataset_identifier') info = io.load_file("info_file") last_pulled = info.get('last_updated', None) last_pulled_dt = datetime.strptime( last_pulled, date_format) if last_pulled is not None else datetime( 1970, 1, 1) if bike_data is None: client = auth.get_soda_client(url) json_data = client.get(dataset_identifier=dataset_identifier, content_type='json', limit=50000) bike_data = pd.DataFrame(json_data) date = pd.to_datetime(bike_data['date']) expand_datetime_features(bike_data, date) info['last_updated'] = datetime.now().strftime(date_format) if verbose: print "New Data Pulled: {0} new rows of data".format( str(bike_data.shape[0])) elif datetime.date(last_pulled_dt) < datetime.date(datetime.today()): if verbose: print "Last Checked database on {0}, Checking for New Data".format( last_pulled_dt) client = auth.get_soda_client(url) new_json_data = client.get( dataset_identifier=dataset_identifier, content_type='json', limit=50000, where="date > \"{0}\"".format( bike_data.loc[bike_data.index[-1]]['date'])) new_bike_data = pd.DataFrame(new_json_data) info['last_updated'] = datetime.now().strftime(date_format) if 'date' in new_bike_data.columns: # Bedeutet data was pulled from server expand_datetime_features(new_bike_data, 'date') bike_data = bike_data.append(new_bike_data, ignore_index=True) if verbose: print "New data pulled:\n{0} new rows of data\nTotal = {1}".format( str(new_bike_data.shape[0]), str(bike_data.shape[0])) else: if verbose: print "No new data" # TODO: convert numeric columns into numeric types from object types bridge_count_numeric = pd.to_numeric( bike_data.loc[:, ['fremont_bridge_sb', 'fremont_bridge_nb']]) bike_data.loc[:, ['fremont_bridge_sb', 'fremont_bridge_nb' ]] = bridge_count_numeric bike_data['y'] = bike_data['fremont_bridge_nb'].astype( float) + bike_data['fremont_bridge_sb'].astype(float) expand_datetime_features(bike_data, 'date') if save_data: io.save_file("data_file", bike_data) io.save_file("info_file", info) if verbose: print "Bike data is up-to-date." return bike_data
from visual_modules import * from computational_modules import * import utils.io as io import utils.alg as alg import utils.gen as gen import utils.features as features desired_width = 320 pd.set_option("display.width", desired_width) pd.set_option('expand_frame_repr', True) cfg = io.load_config_file() # get paths pwd = io.get_path('project_path', cfg=cfg) # Data xdf = io.load_file('feature_file') xdf1 = xdf.groupby(['day', 'month', 'year']).mean()
def get_private_key(key_type='googlemaps_key',cfg=io.load_config_file()): key_json = io.load_file(name=key_type,cfg=cfg) return key_json['private_key']
def get_client_id(key_type='googlemaps_key',cfg=io.load_config_file()): key_json = io.load_file(name=key_type,cfg=cfg) return key_json['client_id']