def test_worksheet(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') import string import random import pandas as pd from df2gspread import df2gspread as d2g from df2gspread import gspread2df as g2d from df2gspread.utils import get_credentials from df2gspread.gfiles import get_file_id from df2gspread.gfiles import delete_file df_upload = pd.DataFrame( {'0': ['1', '2', '3', '4']}, index=['1', '2', '3', '4']) filepath = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) # First worksheet as default d2g.upload(df_upload, filepath) df_download = g2d.download(filepath, col_names=True, row_names=True) assert all(df_upload == df_download) # updating existed spreadsheet d2g.upload(df_upload, filepath, wks_name=filepath) df_download = g2d.download(filepath, col_names=True, row_names=True) assert all(df_upload == df_download) # Clear created file from drive credentials = get_credentials() file_id = get_file_id(credentials, filepath) delete_file(credentials, file_id)
def test_spreadsheet_invalid_file_id(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') from df2gspread import gspread2df as g2d with pytest.raises(RuntimeError): g2d.download(gfile='invalid_file_id')
def test_gspread2df_start_cell(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') import string import random import numpy as np import pandas as pd from numpy.testing import assert_array_equal from pandas.util.testing import assert_frame_equal from df2gspread import df2gspread as d2g from df2gspread import gspread2df as g2d from df2gspread.utils import get_credentials from df2gspread.gfiles import get_file_id from df2gspread.gfiles import delete_file filepath = '/df2gspread_tests/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) df_upload = pd.DataFrame( { 0: ['1', '2', 'x', '4'], 1: ['2', '2', 'y', '4'], 2: ['3', '2', 'w', '4'], 3: ['4', '2', 'z', '4'] }, index=[0, 1, 2, 3]) # Start cell out of the table size d2g.upload(df_upload, filepath, row_names=False, col_names=False) with pytest.raises(RuntimeError): df_download = g2d.download(filepath, start_cell='A5') with pytest.raises(RuntimeError): df_download = g2d.download(filepath, start_cell='E1') # Should be fixed in gspread # with pytest.raises(RuntimeError): # df_download = g2d.download(filepath, start_cell='A0') # start_cell = 'A3' d2g.upload(df_upload, filepath, row_names=False, col_names=False) df_download = g2d.download(filepath, start_cell='A3') assert_array_equal(df_upload.iloc[2:, :], df_download) # start_cell = 'B3' d2g.upload(df_upload, filepath, row_names=False, col_names=False) df_download = g2d.download(filepath, start_cell='B3') assert_array_equal(df_upload.iloc[2:, 1:], df_download) # Clear created file from drive credentials = get_credentials() file_id = get_file_id(credentials, filepath) delete_file(credentials, file_id)
def test_gspread2df_start_cell(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') import string import random import numpy as np import pandas as pd from numpy.testing import assert_array_equal from pandas.util.testing import assert_frame_equal from df2gspread import df2gspread as d2g from df2gspread import gspread2df as g2d from df2gspread.utils import get_credentials from df2gspread.gfiles import get_file_id from df2gspread.gfiles import delete_file filepath = '/df2gspread_tests/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) df_upload = pd.DataFrame( {0: ['1', '2', 'x', '4'], 1: ['2', '2', 'y', '4'], 2: ['3', '2', 'w', '4'], 3: ['4', '2', 'z', '4']}, index=[0, 1, 2, 3]) # Start cell out of the table size d2g.upload(df_upload, filepath, row_names=False, col_names=False) with pytest.raises(RuntimeError): df_download = g2d.download(filepath, start_cell='A5') with pytest.raises(RuntimeError): df_download = g2d.download(filepath, start_cell='E1') # Should be fixed in gspread # with pytest.raises(RuntimeError): # df_download = g2d.download(filepath, start_cell='A0') # start_cell = 'A3' d2g.upload(df_upload, filepath, row_names=False, col_names=False) df_download = g2d.download(filepath, start_cell='A3') assert_array_equal(df_upload.iloc[2:,:], df_download) # start_cell = 'B3' d2g.upload(df_upload, filepath, row_names=False, col_names=False) df_download = g2d.download(filepath, start_cell='B3') assert_array_equal(df_upload.iloc[2:,1:], df_download) # Clear created file from drive credentials = get_credentials() file_id = get_file_id(credentials, filepath) delete_file(credentials, file_id)
def test_worksheet_invalid_name(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') import pandas as pd from df2gspread import gspread2df as g2d from df2gspread import df2gspread as d2g filepath = '/df2gspread_tests/invalid_wks_name' df_upload = pd.DataFrame(['test']) d2g.upload(df_upload, filepath) with pytest.raises(RuntimeError): g2d.download(gfile=filepath, wks_name='invalid_wks_name')
def get_manual_GS(): """ CALLED BY: get_GS() FUNCTION: Using df2gspread module, it uploads all Manual google sheet information into a pandas dataframe. The data frame column names are removed, renamed with static names, and subset into needed data Removes any row with a blank sampleID field Adds a generated index column, which reflects the index used on the google sheet originally -- needed later for fetching hyperlink data RETURNS: A pandas dataframe with 11 columns """ #LOAD DATA Manual_GS = '' #FIX ME ADD LINK EDITING INFO Manual_name = '' #FIX ME ADD GOOGLE SHEET NAME manual_df = g2d.download(Manual_GS, Manual_name) #SUBSET DATA NEEDED manual_df.drop(manual_df.index[0], inplace=True) #drop column names manual_df.columns = [ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'AA', 'AB', 'AC', 'AD', 'AE' ] df = manual_df[['A', 'B', 'C', 'E', 'G', 'I', 'P', 'Q', 'S', 'X']] df.columns = [ 'batch', 'date_received', 'sampleID', 'status', 'barcode_set', 'rna_conc', 'qubit_conc', 'second_pico', 'lib_len', 'dna_fa' ] #CLEAN DATA df['index'] = df.index + 1 #so now its like GS #need true index to fetch hyperlink, important to include step here so that index is not skewed in merging df = df.loc[df['sampleID'] != ""] #missing sample ID return (df)
def get_insight_GS(): """ CALLED BY: get_GS() FUNCTION: Using df2gspread module, it uploads all Insight google sheet information into a pandas dataframe. The data frame column names are removed, renamed with static names, and subset into needed data Removes any row with a blank sampleID field RETURNS: A pandas dataframe with 9 columns """ #LOAD DATA Insight_GS = '' #FIX ME ADD LINK EDITING INFO Insight_name = '' #FIX ME ADD GOOGLE SHEET NAME insight_df = g2d.download(Insight_GS, Insight_name) #upload df #SUBSET DATA NEEDED insight_df.drop(insight_df.index[0], inplace=True) #drop column names insight_df.columns = [ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N' ] #rename columns to match GS df = insight_df[['B', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L']] #subset df.columns = [ 'sampleID', 'pre_seq_QC', 'seq_start', 'seq_finish', 'post_seq_QC', 'reseq_start', 'reseq_finish', 'reseq_QC', 'reason' ] #CLEAN DATA df = df.loc[df['sampleID'] != ""] #missing sample ID return (df)
def sheet2BigQuery(gpath, sheet, table, project_id, col): """ Parameters: gpath - Spread Sheet ID sheet - Worksheet name table - BQ destination Table in format dataset.table project_id - GCP Project col - List of columns """ # Read Spreadsheet to Dataframe data = g2d.download(gfile=gpath, wks_name=sheet, col_names=True) # Create Columns MST and UCT data = createTimestamps(data) # Select Columns data_result = data[col] # Save Dataframe to Bigquery data_result.to_gbq(destination_table=table, project_id=project_id, if_exists='replace') return True
def load2df(sheetName): scopes = [ 'https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive' ] credentials = SAC.from_json_keyfile_name("credentials.json", scopes) spreadsheet_key = '1HqhkjYDrTR8jgLIZ0GMJzYqGk4tRNd6hTWVlzfySccE' df = g2d.download(gfile=spreadsheet_key, wks_name=sheetName, credentials=credentials) #設定第1欄 index, 第1列 column data = [] column = [] index = [] for i in range(1, len(df.loc[0])): column.append(df.loc[0][i]) for i in range(1, len(df.index)): tmpls = list(df.values[i]) index.append(tmpls.pop(0)) data.append(tmpls) return pd.DataFrame(data=data, columns=column, index=index)
def main(): print('Testing Google API connection...') sheets_id = '1i7wVz-uP9kgLwNRWyRBvmQ9TTuEx9i0xi23GfSA7sSw' sheet_name = 'northwood' try: gs = g2d.download(sheets_id, sheet_name, row_names=False, col_names=True) except TimeoutError: print('No connection, programme stops.') import sys sys.exit() print('Connected, continue...') import script.zoopla as zpl print('Getting the latest from Zoopla...') full_zoopla = zpl.query_zoopla() # print('Getting the current Google Sheets...') # gs = zpl.get_latest_gs() zpl.upload_new_rows(existing_gs=gs, new_zoopla=full_zoopla)
def get_latest_gs(sheets_id = '1i7wVz-uP9kgLwNRWyRBvmQ9TTuEx9i0xi23GfSA7sSw', sheet_name = 'northwood'): try: df = g2d.download(sheets_id, sheet_name, row_names = False, col_names = True) return df except TimeoutError: import sys sys.exit()
def main(): # Define the auth scopes to request. scope_ga = ['https://www.googleapis.com/auth/analytics.readonly'] #Authorize credentials with Google Drive to export data to a Google spreadsheet scope_drive = [ 'https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive' ] credentials = ServiceAccountCredentials.from_json_keyfile_name( json_key_file_location, scope_drive) gc = gspread.authorize(credentials) # Authenticate and construct service. service = get_service('analytics', 'v3', scope_ga, key_file_location, service_account_email) profile = get_first_profile_id(service) # Modulize the classes ga_non_segmented = GA_non_segmented(service, profile) ga_segmented = GA_segmented(service, profile) # Create an empty pandas dataframe and fill it with this weeks data data = pd.DataFrame() for metric in no_seg: data = data.append(ga_non_segmented.get_results( service, profile, start_day, end_day, metric).get('rows')[0], ignore_index=True) for metric in seg: data = data.append( ga_segmented.get_results(service, profile, start_day, end_day, metric, marketing_paid).get('rows')[0], ignore_index=True) for metric in seg: data = data.append( ga_segmented.get_results(service, profile, start_day, end_day, metric, marketing_free).get('rows')[0], ignore_index=True) data.columns = [dt.date.today().isocalendar()[1]] seg_paid = [] for word in seg: seg_paid.append(word + '_paid') seg_free = [] for word in seg: seg_free.append(word + '_free') data = data.set_index([no_seg + seg_paid + seg_free]) # Download the existing Gsheet data to a pandas dataframe spreadsheet = 'your_googlesheet_id' wks_name = 'ga_weekly_data' existing_data = g2d.download(gfile=spreadsheet, wks_name=wks_name, col_names=True, row_names=True) # Define new DataFrame, calculate and append data ga_weekly_data = pd.DataFrame() ga_weekly_data = existing_data ga_weekly_data[data.columns] = data[data.columns] #Upload the whole pandas dataframe to the data dump spreadsheet on worksheet installs d2g.upload(ga_weekly_data, spreadsheet, wks_name)
def test_big_worksheet(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') import string import random import numpy as np import pandas as pd from pandas.util.testing import assert_frame_equal from df2gspread import df2gspread as d2g from df2gspread import gspread2df as g2d from df2gspread.utils import get_credentials from df2gspread.gfiles import get_file_id from df2gspread.gfiles import delete_file filepath = '/df2gspread_tests/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) # Indexes count out of default 1000 df_upload = pd.DataFrame(index=range(1001), columns=range(2)) df_upload = df_upload.fillna('0') d2g.upload(df_upload, filepath, row_names=False, col_names=False) df_download = g2d.download(filepath) # df_download.columns = df_download.columns.astype(np.int64) assert_frame_equal(df_upload, df_download) # Columns count out of default 100 df_upload = pd.DataFrame(index=range(1), columns=range(101)) df_upload = df_upload.fillna('0') d2g.upload(df_upload, filepath, row_names=False, col_names=False) df_download = g2d.download(filepath) # df_download.columns = df_download.columns.astype(np.int64) assert_frame_equal(df_upload, df_download) # Clear created file from drive credentials = get_credentials() file_id = get_file_id(credentials, filepath) delete_file(credentials, file_id)
def before_request(): global df_master print('bfr downloading') df_master = g2d.download("1Jds3z4WA9qDrl19qWAOyAVNIUNLtGTzZoijEWHS6Jbc", "shelters", col_names=True, row_names=True) print('bfr add geo locations') geolocator = Nominatim() df_master['lat'] = df_master['address'].apply(lambda x: geolocator.geocode(x).latitude) df_master['lng'] = df_master['address'].apply(lambda x: geolocator.geocode(x).longitude)
def extract_cdc_beds_historical_data(credentials): cdc_historical_df = gspread2df.download( CATEGORY_3_DATA_GOOGLE_SHEET_KEY, CATEGORY_3_HISTORICAL_DATA_TAB, credentials=credentials, col_names=True, ) cdc_historical_df = cdc_historical_df.set_index(STATE_FIELD) return cdc_historical_df
def readFromGoogle(SPREADSHEET_KEY, WORKSHEET_NAME): credentials = ServiceAccountCredentials.from_json_keyfile_name( 'credentials.json', scope) gc = gspread.authorize(credentials) urls = g2d.download(SPREADSHEET_KEY, WORKSHEET_NAME, credentials=credentials, col_names=True) print("Reading done") return urls[['Ticker', 'Company']]
def download(self, max_retry_count=3): counter = 0 while counter < max_retry_count: try: return g2d.download(self.spreadsheet, self.sheetname, col_names=True, row_names=True) except Exception as e: counter += 1 # error = e return None
def download_as_df(self, sheet_id, sheet_name): scopes = ['https://www.googleapis.com/auth/spreadsheets'] json_file = '/Users/watarut/Downloads/Earth-Neuro-Link.json' #Json Cliant ID for OAuth credentials = ServiceAccountCredentials.from_json_keyfile_name( json_file, scopes=scopes) http_auth = credentials.authorize(Http()) df = g2d.download(sheet_id, wks_name=sheet_name, col_names=True, row_names=False, credentials=credentials) return df
def readFromGoogle(SPREADSHEET_KEY, WORKSHEET_NAME): credentials = ServiceAccountCredentials.from_json_keyfile_name( 'credentials.json', scope) gc = gspread.authorize(credentials) df = g2d.download(SPREADSHEET_KEY, WORKSHEET_NAME, credentials=credentials, col_names=True) print("Reading done") return df[[ 'Ticker', 'Company', 'Financial Links', 'Search Key for Company', 'Press Release Links', 'Search Key for Press Release' ]]
def get_keywords_from_source(location, keyword_col_name, source_name): """ Generating the keyword lists to search on """ test_keywords = pd.DataFrame() if '.csv' in location: s = requests.get(location).content test_df = pd.read_csv(io.StringIO(s.decode('utf-8'))) elif "recommendation" in location: pass else: test_df = g2d.download(location, wks_name='Sheet2', col_names=True) if '.csv' in location: test_keywords['category'] = test_df.category test_keywords['keyword'] = test_df[keyword_col_name] test_keywords['source'] = source_name test_keywords['notes'] = None return test_keywords
def get_weights(loc='cloud'): 'local', 'cloud' if loc == 'local': weights = pd.read_excel( '/home/rem/Documents/FXCM Trading (Dropbox)/Weights.xlsx', sheetname='Weights', index_col=0) if loc == 'cloud': #weights = pd.read_excel("https://1drv.ms/x/s!ApHwtSabAP46itkDw2YNwQHNAzCM4A", sheetname='Weights', index_col=0) weights = g2d.download( gfile="1bmy2DLu5NV5IP-mo9rGWOyHOx7bEfoglVZmzzuHi5zc", wks_name="Weights", col_names=True, row_names=True, credentials=None, start_cell='A1') #print('\n', 'Weights\n', weights, '\n', '\n') weights = weights.apply(pd.to_numeric, errors='ignore') return weights
def download_df(worksheet_name): """Downloading an existing worksheet from Google Drive. Args: worksheet_name: The correct worksheet name on the spreadsheet. Returns: The existing worksheet data, or None; and the first date of the next week's query. """ try: existing = g2d.download(gfile = SPREADSHEET, wks_name = worksheet_name, col_names = True, row_names = True) start_date = get_week_days( int(existing.columns[-1].split('-')[0]), int(existing.columns[-1].split('-')[1]) ) + dt.timedelta(days = 7) return existing, start_date except RuntimeError: existing = None start_date = dt.date(2016, 8, 29) return existing, start_date
def get_GS(): # use full path to spreadsheet file Andy_cust_GS = "INSERT LINK TO EDITING" # FIX ME Andy_cust_name = "INSERT GS NAME" #FIX ME #Convert GS into pandas df andy_cust_df = g2d.download(Andy_cust_GS, Andy_cust_name, col_names=True, row_names=True) andy_cust_df['Kit ID'] = andy_cust_df.index andy_cust_df = andy_cust_df.reset_index(drop=True) #subset df and drop all rows which are missing Kit IDs df = andy_cust_df[[ 'Kit ID', 'Customer name', 'Date Shipped', 'USPS Tracking', 'Return Mailer Tracking', 'emails', 'ID' ]] df = df.loc[df['Kit ID'] != ''] return df
def clean_jotform(jotform_id): """ Read the Jotform data from a google spreadsheet and returned the cleaned pandas DataFrame. Parameters ---------- jotform_id : str the google sheet ID Returns ------- df : DataFrame the pandas data frame holding the RSVP submission data """ # read the data kws = {'col_names':True, 'row_names':False, 'start_cell':'A1'} data = g2d.download(jotform_id, 'Sheet1', **kws) # set empty cells to null data = data.replace("", np.nan) # remove entries where First Name and Last Name are null invalid = np.ones(len(data), dtype='?') for col in ['First Name', 'Last Name']: invalid &= data[col].isnull() data = data[~invalid] # store submission date as a data and sort by it data['Submission Date'] = pd.to_datetime(data['Submission Date']) data = data.sort_values(by=['Submission Date']) # remove duplicates, keeping last entry (most recent entry) data = data.drop_duplicates(subset=['First Name', 'Last Name'], keep='last') return data
def test_df2gspread_start_cell(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') import string import random import numpy as np import pandas as pd from pandas.util.testing import assert_frame_equal from df2gspread import df2gspread as d2g from df2gspread import gspread2df as g2d from df2gspread.utils import get_credentials from df2gspread.gfiles import get_file_id from df2gspread.gfiles import delete_file filepath = '/df2gspread_tests/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) df_upload_0 = pd.DataFrame( {0: ['1', '2', 'x', '4']}, index=[0, 1, 2, 3]) d2g.upload(df_upload_0, filepath, row_names=False, col_names=False, start_cell='A1') df_download = g2d.download(filepath) df_upload = df_upload_0 assert_frame_equal(df_upload, df_download) d2g.upload(df_upload_0, filepath, row_names=False, col_names=False, start_cell='A2') df_download = g2d.download(filepath) df_upload = df_upload_0 new_rows = 1 new_rows_array = np.chararray((new_rows, len(df_upload.columns))) new_rows_array[:] = '' df_new_rows = pd.DataFrame(data = new_rows_array) df_upload = df_new_rows.append(df_upload, ignore_index=True) assert_frame_equal(df_upload, df_download) d2g.upload(df_upload_0, filepath, row_names=False, col_names=False, start_cell='B1') df_download = g2d.download(filepath) df_upload = df_upload_0 df_upload.insert(0, '-1', '') df_upload.columns = range(0, len(df_upload.columns)) assert_frame_equal(df_upload, df_download) d2g.upload(df_upload_0, filepath, row_names=False, col_names=False, start_cell='AB10') df_download = g2d.download(filepath) df_upload = df_upload_0 new_cols = 27 new_cols_array = np.chararray((len(df_upload), new_cols)) new_cols_array[:] = '' df_new_cols = pd.DataFrame(data = new_cols_array) df_upload = pd.concat([df_new_cols, df_upload], axis=1) df_upload.columns = range(0, len(df_upload.columns)) new_rows = 9 new_rows_array = np.chararray((new_rows, len(df_upload.columns))) new_rows_array[:] = '' df_new_rows = pd.DataFrame(data = new_rows_array) df_upload = df_new_rows.append(df_upload, ignore_index=True) assert_frame_equal(df_upload, df_download) # Backward compatibility df2gspread => gspread2df d2g.upload(df_upload_0, filepath, row_names=False, col_names=False, start_cell='AB10') df_upload = df_upload_0 df_download = g2d.download(filepath, start_cell='AB10') assert_frame_equal(df_upload, df_download) d2g.upload(df_upload_0, filepath, start_cell='AB10') df_upload = df_upload_0 df_download = g2d.download(filepath, row_names=True, col_names=True, start_cell='AB10') df_download.index = df_download.index.astype(np.int64) df_download.columns = df_download.columns.astype(np.int64) assert_frame_equal(df_upload, df_download) # Clear created file from drive credentials = get_credentials() file_id = get_file_id(credentials, filepath) delete_file(credentials, file_id)
'https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive' ] creds = os.getenv('GOOGLE_SHEETS_CREDS_JSON') print(creds) #with open('gcreds.json', 'w') as fp: # json.dump(json.loads(creds.replace('\n','')), fp) credentials = ServiceAccountCredentials.from_json_keyfile_dict( json.loads(creds.replace('\n', '')), scope) gc = gspread.authorize(credentials) spreadsheet_key = '1QJ1Kn2j2WpLZbKnci6pyz_XpV7ULCs8qV3mA3cfV1gg' wks_name = 'sales_totals' gtotals = g2d.download(gfile=spreadsheet_key, wks_name=wks_name, credentials=credentials, col_names=True) gtotals['total'] = gtotals.total.astype(float) gtotals['sold'] = gtotals.sold.astype(float) gtotals['left'] = gtotals.left.astype(float) gtotals['sales_revenue'] = gtotals.sales_revenue.astype(float) # gtotals.head() gtotals_daily = gtotals[['date', 'total', 'sold', 'left', 'sales_revenue']].groupby('date').max().reset_index() app = dash.Dash(__name__) server = app.server app.layout = html.Div(children=[ html.H1(children='Positive Grid Spark Amplifier Sales Metrics'), html.Div(children=[
def test_worksheet(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') import string import random import numpy as np import pandas as pd from pandas.util.testing import assert_frame_equal from df2gspread import df2gspread as d2g from df2gspread import gspread2df as g2d from df2gspread.utils import get_credentials from df2gspread.gfiles import get_file_id from df2gspread.gfiles import delete_file df_upload = pd.DataFrame( {0: ['1', '2', 'x', '4']}, index=[0, 1, 2, 3]) filepath = '/df2gspread_tests/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) # First worksheet as default d2g.upload(df_upload, filepath) df_download = g2d.download(filepath, col_names=True, row_names=True) df_download.columns = df_download.columns.astype(np.int64) df_download.index = df_download.index.astype(np.int64) assert_frame_equal(df_upload, df_download) # Updating existed spreadsheet d2g.upload(df_upload, filepath, wks_name='Sheet2') df_download = g2d.download(filepath, col_names=True, row_names=True) df_download.columns = df_download.columns.astype(np.int64) df_download.index = df_download.index.astype(np.int64) assert_frame_equal(df_upload, df_download) # Updating with file_id credentials = get_credentials() file_id = get_file_id(credentials, filepath) d2g.upload(df_upload, file_id) df_download = g2d.download(file_id, col_names=True, row_names=True) df_download.columns = df_download.columns.astype(np.int64) df_download.index = df_download.index.astype(np.int64) assert_frame_equal(df_upload, df_download) # Only row_names wks = d2g.upload(df_upload, filepath, col_names=False) df_download = g2d.download(filepath, row_names=True) df_download.index = df_download.index.astype(np.int64) # df_download.columns = df_download.columns.astype(np.int64) assert_frame_equal(df_upload, df_download) # Only col_names wks = d2g.upload(df_upload, filepath, row_names=False) df_download = g2d.download(filepath, col_names=True) df_download.columns = df_download.columns.astype(np.int64) assert_frame_equal(df_upload, df_download) # Without column or row names wks = d2g.upload(df_upload, filepath, row_names=False, col_names=False) df_download = g2d.download(filepath) assert_frame_equal(df_upload, df_download) # Clear created file from drive delete_file(credentials, file_id)
def test_df2gspread_df_size(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') import string import random import numpy as np import pandas as pd import gspread from pandas.util.testing import assert_frame_equal, assert_equal from df2gspread import df2gspread as d2g from df2gspread import gspread2df as g2d from df2gspread.utils import get_credentials from df2gspread.gfiles import get_file_id, delete_file, get_worksheet filepath = '/df2gspread_tests/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) credentials = get_credentials() gc = gspread.authorize(credentials) gfile_id = get_file_id(credentials, filepath, write_access=True) df_upload_a = pd.DataFrame( {0: ['1', '2', 'x', '4']}, index=[0, 1, 2, 3]) df_upload_b = pd.DataFrame(data = np.array([np.arange(1500)]*2).T).applymap(str) #Uploading a small DF to new sheet to test for sizing down from default d2g.upload(df_upload_a, filepath, "test1", row_names=False, col_names=False, df_size=True) df_download = g2d.download(filepath, "test1") df_upload = df_upload_a wks = get_worksheet(gc, gfile_id, "test1") assert_equal(wks.row_count, len(df_upload)) assert_equal(len(df_upload.columns), wks.col_count) assert_equal(len(df_download), len(df_upload)) assert_frame_equal(df_upload, df_download) #Upload a large DF to existing, smaller sheet to test for proper expansion d2g.upload(df_upload_b, filepath, "test1", row_names=False, col_names=False, df_size=True) df_download = g2d.download(filepath, "test1") df_upload = df_upload_b wks = get_worksheet(gc, gfile_id, "test1") assert_equal(wks.row_count, len(df_upload)) assert_equal(len(df_upload.columns), wks.col_count) assert_equal(len(df_download), len(df_upload)) assert_frame_equal(df_upload, df_download) #Uploading a small DF to existing large sheet to test for sizing down from default d2g.upload(df_upload_a, filepath, "test1", row_names=False, col_names=False, df_size=True) df_download = g2d.download(filepath, "test1") df_upload = df_upload_a wks = get_worksheet(gc, gfile_id, "test1") assert_equal(wks.row_count, len(df_upload)) assert_equal(len(df_upload.columns), wks.col_count) assert_equal(len(df_download), len(df_upload)) assert_frame_equal(df_upload, df_download) #New sheet with col names, make sure 1 extra row and column d2g.upload(df_upload_a, filepath, "test2", row_names=True, col_names=True, df_size=True) df_download = g2d.download(filepath, "test2") df_upload = df_upload_a wks = get_worksheet(gc, gfile_id, "test2") assert_equal(wks.row_count, len(df_upload) + 1) assert_equal(len(df_upload.columns) + 1, wks.col_count) assert_equal(len(df_download), len(df_upload) + 1) #Upload to new sheet with specified dimensions d2g.upload(df_upload_a, filepath, "test3", row_names=False, col_names=False, new_sheet_dimensions=(100,10)) df_download = g2d.download(filepath, "test3") df_upload = df_upload_a wks = get_worksheet(gc, gfile_id, "test3") assert_equal(wks.row_count, 100) assert_equal(10, wks.col_count) assert_frame_equal(df_upload, df_download) #Test df_size with start_cell d2g.upload(df_upload_a, filepath, "test4", row_names=False, col_names=False, start_cell='AB10', df_size = True) df_download = g2d.download(filepath, "test4") df_upload = df_upload_a new_cols = 27 new_cols_array = np.chararray((len(df_upload), new_cols)) new_cols_array[:] = '' df_new_cols = pd.DataFrame(data = new_cols_array) df_upload = pd.concat([df_new_cols, df_upload], axis=1) df_upload.columns = range(0, len(df_upload.columns)) new_rows = 9 new_rows_array = np.chararray((new_rows, len(df_upload.columns))) new_rows_array[:] = '' df_new_rows = pd.DataFrame(data = new_rows_array) df_upload = df_new_rows.append(df_upload, ignore_index=True) wks = get_worksheet(gc, gfile_id, "test4") assert_equal(wks.row_count, len(df_upload)) assert_equal(len(df_upload.columns), wks.col_count) assert_equal(len(df_download), len(df_upload)) assert_frame_equal(df_upload, df_download) #Test df_size with start_cell and sheet dimensions which need to be expanded d2g.upload(df_upload_a, filepath, "test5", row_names=False, col_names=False, start_cell='AB10', df_size = True, new_sheet_dimensions = (10,27)) df_download = g2d.download(filepath, "test5") df_upload = df_upload_a new_cols = 27 new_cols_array = np.chararray((len(df_upload), new_cols)) new_cols_array[:] = '' df_new_cols = pd.DataFrame(data = new_cols_array) df_upload = pd.concat([df_new_cols, df_upload], axis=1) df_upload.columns = range(0, len(df_upload.columns)) new_rows = 9 new_rows_array = np.chararray((new_rows, len(df_upload.columns))) new_rows_array[:] = '' df_new_rows = pd.DataFrame(data = new_rows_array) df_upload = df_new_rows.append(df_upload, ignore_index=True) wks = get_worksheet(gc, gfile_id, "test5") assert_equal(wks.row_count, len(df_upload)) assert_equal(len(df_upload.columns), wks.col_count) assert_equal(len(df_download), len(df_upload)) assert_frame_equal(df_upload, df_download) # Clear created file from drive delete_file(credentials, gfile_id)
args = parser.parse_args() s_id = '1AdUDx4-xDF9PvYCu-JpwTnkqs3wvWxrQ4GTdehGl-AU' wks_name = 'risky queue' scope = [ 'https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive' ] credentials = ServiceAccountCredentials.from_json_keyfile_name( 'risky-users-e25b80bbf7bb.json', scope) t = g2d.download(s_id, wks_name, col_names=True, row_names=False, credentials=credentials) t['user_id'] = t['user_id'].astype('int') t['num_kids'] = t['num_kids'].astype('float').astype('int') t['created_at'] = pd.to_datetime(t['created_at']) u = '(' + ', '.join([str(x) for x in set(t['user_id'])]) + ')' users = pd.read_sql_query( """ SELECT user_id, max(user_box_rank) AS user_box_rank FROM dw.fact_user_box_count WHERE season_id = 10 AND user_id IN {users}
'Size': size_my, 'Descrip': descr_my, }) my_repos.to_csv("Repos.csv") #-------- Searching Github -------------------------------------------------------------- from df2gspread import gspread2df as g2d from df2gspread import df2gspread as d2g d2g.upload(pr_instr, gfile='/Trading FXCM/PyData', wks_name='pr_instr') weights = g2d.download(gfile="1bmy2DLu5NV5IP-mo9rGWOyHOx7bEfoglVZmzzuHi5zc", wks_name="Weights", col_names=True, row_names=True, credentials=None, start_cell='A1') keywords_all = "IEX, hedge, oanda, quandl, NYSE, ETF, " \ "market calendar, equity, kelly, arbitrage, backtest, " \ "quant, EDGAR, del Prado, zorro trading" keywords = [keyword.strip() for keyword in keywords_all.split(',')] # exclude_keywords = "ng-zorro, ngx-zorro, ngzorro, CSS, Typescript" # exclude_keyword = [keyword.strip() for keyword in exclude_keywords.split(',')] # query = '+'.join(keyword) + '+NOT'+ '+'.join(exclude_keyword)+'pushed:>=2019-07-05'+'language:python' +'+in:readme+in:description'
def input_gload(uri, column='Username', wks_name=None): if not gspread2df: raise RuntimeError('df2gspread is not installed') df = gspread2df.download(uri, wks_name, col_names=True) pool = df[column] return pool.values
def test_worksheet(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') import string import random import numpy as np import pandas as pd from pandas.util.testing import assert_frame_equal from df2gspread import df2gspread as d2g from df2gspread import gspread2df as g2d from df2gspread.utils import get_credentials from df2gspread.gfiles import get_file_id from df2gspread.gfiles import delete_file df_upload = pd.DataFrame({0: ['1', '2', 'x', '4']}, index=[0, 1, 2, 3]) filepath = '/df2gspread_tests/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) # First worksheet as default d2g.upload(df_upload, filepath) df_download = g2d.download(filepath, col_names=True, row_names=True) df_download.columns = df_download.columns.astype(np.int64) df_download.index = df_download.index.astype(np.int64) assert_frame_equal(df_upload, df_download) # Updating existed spreadsheet d2g.upload(df_upload, filepath, wks_name='Sheet2') df_download = g2d.download(filepath, col_names=True, row_names=True) df_download.columns = df_download.columns.astype(np.int64) df_download.index = df_download.index.astype(np.int64) assert_frame_equal(df_upload, df_download) # Updating with file_id credentials = get_credentials() file_id = get_file_id(credentials, filepath) d2g.upload(df_upload, file_id) df_download = g2d.download(file_id, col_names=True, row_names=True) df_download.columns = df_download.columns.astype(np.int64) df_download.index = df_download.index.astype(np.int64) assert_frame_equal(df_upload, df_download) # Only row_names wks = d2g.upload(df_upload, filepath, col_names=False) df_download = g2d.download(filepath, row_names=True) df_download.index = df_download.index.astype(np.int64) # df_download.columns = df_download.columns.astype(np.int64) assert_frame_equal(df_upload, df_download) # Only col_names wks = d2g.upload(df_upload, filepath, row_names=False) df_download = g2d.download(filepath, col_names=True) df_download.columns = df_download.columns.astype(np.int64) assert_frame_equal(df_upload, df_download) # Without column or row names wks = d2g.upload(df_upload, filepath, row_names=False, col_names=False) df_download = g2d.download(filepath) assert_frame_equal(df_upload, df_download) # Clear created file from drive delete_file(credentials, file_id)
def test_df2gspread_start_cell(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') import string import random import numpy as np import pandas as pd from pandas.util.testing import assert_frame_equal from df2gspread import df2gspread as d2g from df2gspread import gspread2df as g2d from df2gspread.utils import get_credentials from df2gspread.gfiles import get_file_id from df2gspread.gfiles import delete_file filepath = '/df2gspread_tests/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) df_upload_0 = pd.DataFrame({0: ['1', '2', 'x', '4']}, index=[0, 1, 2, 3]) d2g.upload(df_upload_0, filepath, row_names=False, col_names=False, start_cell='A1') df_download = g2d.download(filepath) df_upload = df_upload_0 assert_frame_equal(df_upload, df_download) d2g.upload(df_upload_0, filepath, row_names=False, col_names=False, start_cell='A2') df_download = g2d.download(filepath) df_upload = df_upload_0 new_rows = 1 new_rows_array = np.chararray((new_rows, len(df_upload.columns))) new_rows_array[:] = '' df_new_rows = pd.DataFrame(data=new_rows_array) df_upload = df_new_rows.append(df_upload, ignore_index=True) assert_frame_equal(df_upload, df_download) d2g.upload(df_upload_0, filepath, row_names=False, col_names=False, start_cell='B1') df_download = g2d.download(filepath) df_upload = df_upload_0 df_upload.insert(0, '-1', '') df_upload.columns = range(0, len(df_upload.columns)) assert_frame_equal(df_upload, df_download) d2g.upload(df_upload_0, filepath, row_names=False, col_names=False, start_cell='AB10') df_download = g2d.download(filepath) df_upload = df_upload_0 new_cols = 27 new_cols_array = np.chararray((len(df_upload), new_cols)) new_cols_array[:] = '' df_new_cols = pd.DataFrame(data=new_cols_array) df_upload = pd.concat([df_new_cols, df_upload], axis=1) df_upload.columns = range(0, len(df_upload.columns)) new_rows = 9 new_rows_array = np.chararray((new_rows, len(df_upload.columns))) new_rows_array[:] = '' df_new_rows = pd.DataFrame(data=new_rows_array) df_upload = df_new_rows.append(df_upload, ignore_index=True) assert_frame_equal(df_upload, df_download) # Backward compatibility df2gspread => gspread2df d2g.upload(df_upload_0, filepath, row_names=False, col_names=False, start_cell='AB10') df_upload = df_upload_0 df_download = g2d.download(filepath, start_cell='AB10') assert_frame_equal(df_upload, df_download) d2g.upload(df_upload_0, filepath, start_cell='AB10') df_upload = df_upload_0 df_download = g2d.download(filepath, row_names=True, col_names=True, start_cell='AB10') df_download.index = df_download.index.astype(np.int64) df_download.columns = df_download.columns.astype(np.int64) assert_frame_equal(df_upload, df_download) # Clear created file from drive credentials = get_credentials() file_id = get_file_id(credentials, filepath) delete_file(credentials, file_id)
def stress_test(event='none', alpha=0.05, compare='SPY', *args, **kwargs): start = '' end = '' pr = pd.read_csv( "/home/rem/Documents/FXCM Trading (Dropbox)/Stress Test Data.csv", index_col=0) log_pr = np.log(pr) log_rt = np.around(log_pr.diff() * 100, 1) log_rt = log_rt.drop(log_rt.index[0]).fillna(0) # Events if event == 'us': log_rt = log_rt.ix['2011-07-01':'2011-12-30'] pr = pr.ix['2011-07-01':'2011-12-30'] start = datetime.date(2011, 7, 1) end = datetime.date(2011, 12, 30) event_name = 'US DOWNGRADE' elif event == 'ch': log_rt = log_rt.ix['2015-08-01':'2016-04-29'] pr = pr.ix['2015-08-01':'2016-04-29'] start = datetime.date(2015, 8, 1) end = datetime.date(2016, 4, 29) event_name = 'CHINA DEVALUATION' elif event == 'br': log_rt = log_rt.ix['2016-06-01':'2016-07-15'] pr = pr.ix['2016-06-01':'2016-07-15'] start = datetime.date(2016, 6, 1) end = datetime.date(2016, 7, 15) event_name = 'BREXIT' elif event == 'all': log_rt1 = log_rt.ix['2011-07-01':'2012-03-30'] log_rt2 = log_rt.ix['2015-08-01':'2016-04-29'] log_rt3 = log_rt.ix['2016-06-01':'2016-07-15'] log_rt = pd.concat([log_rt1, log_rt2, log_rt3]) pr1 = pr.ix['2011-07-01':'2012-03-30'] pr2 = pr.ix['2015-08-01':'2016-04-29'] pr3 = pr.ix['2016-06-01':'2016-07-15'] pr = pd.concat([pr1, pr2, pr3]) start = datetime.date(2011, 7, 1) end = datetime.date(2016, 7, 15) event_name = 'ALL' elif event == 'none': print( 'No event selected\n Options: US Downgrade (us), China Devaluation (ch), Brexit (br), all (all)', '\n') return weights = g2d.download( gfile="1bmy2DLu5NV5IP-mo9rGWOyHOx7bEfoglVZmzzuHi5zc", wks_name="Weights", col_names=True, row_names=True, credentials=None, start_cell='A1') weights = weights.apply(pd.to_numeric, errors='ignore') R = log_rt W = weights.WEIGHTS # T-do: Rename Z to rt_NAV S = R.apply(lambda x: np.asarray(x) * np.asarray(W), axis=1) S['rt_hist'] = np.round(S.sum(axis=1), 1) returns = S.rt_hist sorted_returns = np.sort(returns) index = int(alpha * len(sorted_returns)) var_stress = np.round( abs(sorted_returns[index]) / 100 * weights.EQUITY[0], 0) sum_var = sorted_returns[0] for i in range(1, index): sum_var += sorted_returns[i] cvar_stress = np.round(abs(sum_var / index) / 100 * weights.EQUITY[0], 0) # Plot compare pr['Portfolio'] = S.rt_hist.cumsum() compare2 = kwargs.get('compare2', None) pr[[compare, compare2, 'Portfolio']].plot(subplots=True, title=event_name, layout=(3, 1), figsize=(12, 8), sharex=True) print('\n', 'Stress Test from', start, 'to', end, ' ', '***', event_name, '***', '\n') print('VaR (Stress.):', var_stress, ' ', 'CVaR (Stress.):', cvar_stress, '\n') return #(var_stress, cvar_stress)
def get_mapping(): return g2d.download(gsheet.id, "Mapping", col_names=True, row_names=False)
def test_df2gspread_df_size(user_credentials_not_available): if user_credentials_not_available: pytest.xfail(reason='Credentials') import string import random import numpy as np import pandas as pd import gspread from pandas.util.testing import assert_frame_equal from df2gspread import df2gspread as d2g from df2gspread import gspread2df as g2d from df2gspread.utils import get_credentials from df2gspread.gfiles import get_file_id, delete_file, get_worksheet filepath = '/df2gspread_tests/' + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) credentials = get_credentials() gc = gspread.authorize(credentials) gfile_id = get_file_id(credentials, filepath, write_access=True) df_upload_a = pd.DataFrame({0: ['1', '2', 'x', '4']}, index=[0, 1, 2, 3]) df_upload_b = pd.DataFrame(data=np.array([np.arange(1500)] * 2).T).applymap(str) #Uploading a small DF to new sheet to test for sizing down from default d2g.upload(df_upload_a, filepath, "test1", row_names=False, col_names=False, df_size=True) df_download = g2d.download(filepath, "test1") df_upload = df_upload_a wks = get_worksheet(gc, gfile_id, "test1") assert wks.row_count == len(df_upload) assert len(df_upload.columns) == wks.col_count assert len(df_download) == len(df_upload) assert_frame_equal(df_upload, df_download) #Upload a large DF to existing, smaller sheet to test for proper expansion d2g.upload(df_upload_b, filepath, "test1", row_names=False, col_names=False, df_size=True) df_download = g2d.download(filepath, "test1") df_upload = df_upload_b wks = get_worksheet(gc, gfile_id, "test1") assert wks.row_count == len(df_upload) assert len(df_upload.columns) == wks.col_count assert len(df_download) == len(df_upload) assert_frame_equal(df_upload, df_download) #Uploading a small DF to existing large sheet to test for sizing down from default d2g.upload(df_upload_a, filepath, "test1", row_names=False, col_names=False, df_size=True) df_download = g2d.download(filepath, "test1") df_upload = df_upload_a wks = get_worksheet(gc, gfile_id, "test1") assert wks.row_count == len(df_upload) assert len(df_upload.columns) == wks.col_count assert len(df_download) == len(df_upload) assert_frame_equal(df_upload, df_download) #New sheet with col names, make sure 1 extra row and column d2g.upload(df_upload_a, filepath, "test2", row_names=True, col_names=True, df_size=True) df_download = g2d.download(filepath, "test2") df_upload = df_upload_a wks = get_worksheet(gc, gfile_id, "test2") assert wks.row_count == len(df_upload) + 1 assert len(df_upload.columns) + 1 == wks.col_count assert len(df_download) == len(df_upload) + 1 #Upload to new sheet with specified dimensions d2g.upload(df_upload_a, filepath, "test3", row_names=False, col_names=False, new_sheet_dimensions=(100, 10)) df_download = g2d.download(filepath, "test3") df_upload = df_upload_a wks = get_worksheet(gc, gfile_id, "test3") assert wks.row_count == 100 assert 10 == wks.col_count assert_frame_equal(df_upload, df_download) #Test df_size with start_cell d2g.upload(df_upload_a, filepath, "test4", row_names=False, col_names=False, start_cell='AB10', df_size=True) df_download = g2d.download(filepath, "test4") df_upload = df_upload_a new_cols = 27 new_cols_array = np.chararray((len(df_upload), new_cols)) new_cols_array[:] = '' df_new_cols = pd.DataFrame(data=new_cols_array) df_upload = pd.concat([df_new_cols, df_upload], axis=1) df_upload.columns = range(0, len(df_upload.columns)) new_rows = 9 new_rows_array = np.chararray((new_rows, len(df_upload.columns))) new_rows_array[:] = '' df_new_rows = pd.DataFrame(data=new_rows_array) df_upload = df_new_rows.append(df_upload, ignore_index=True) wks = get_worksheet(gc, gfile_id, "test4") assert wks.row_count == len(df_upload) assert len(df_upload.columns) == wks.col_count assert len(df_download) == len(df_upload) assert_frame_equal(df_upload, df_download) #Test df_size with start_cell and sheet dimensions which need to be expanded d2g.upload(df_upload_a, filepath, "test5", row_names=False, col_names=False, start_cell='AB10', df_size=True, new_sheet_dimensions=(10, 27)) df_download = g2d.download(filepath, "test5") df_upload = df_upload_a new_cols = 27 new_cols_array = np.chararray((len(df_upload), new_cols)) new_cols_array[:] = '' df_new_cols = pd.DataFrame(data=new_cols_array) df_upload = pd.concat([df_new_cols, df_upload], axis=1) df_upload.columns = range(0, len(df_upload.columns)) new_rows = 9 new_rows_array = np.chararray((new_rows, len(df_upload.columns))) new_rows_array[:] = '' df_new_rows = pd.DataFrame(data=new_rows_array) df_upload = df_new_rows.append(df_upload, ignore_index=True) wks = get_worksheet(gc, gfile_id, "test5") assert wks.row_count == len(df_upload) assert len(df_upload.columns) == wks.col_count assert len(df_download) == len(df_upload) assert_frame_equal(df_upload, df_download) # Clear created file from drive delete_file(credentials, gfile_id)