Пример #1
0
 def sendMessagetoHost(self):
     source = Spread("https://docs.google.com/spreadsheets/d/15_YZB-LVAa2NkHORZ8nvBq25YBj4R7ZUHjY11HRl0R4/edit#gid=763524256")
     self.driver.get("https://www.airbnb.com.au")
     sign_in = input("Please sign in!")
     data = source.sheet_to_df()
     hosts = data["url"].values
     count = 0
     for i in hosts:
         self.driver.get(i)
         time.sleep(25)
         soup = BeautifulSoup(self.driver.page_source,'lxml')
         host_id = soup.findAll("a", {"class": "_16lonkd"})
         host_id = host_id[0]
         host_id = host_id["href"]
         host_id = re.findall('\d+',host_id)[0]
         test_message = input("Please enter message you would like to send: ")
         button = self.driver.find_element_by_link_text("Contact host")
         button.click()
         time.sleep(10)
         text_area = self.driver.find_elements_by_id("homes-contact-host--message-textarea")
         text_area[0].send_keys(test_message)
         button = self.driver.find_element_by_class_name("_72kmbi0")
         button.click()
         count+=1
         if count > 10:
             break
Пример #2
0
def df_to_drive(df, foldername, filename, worksheet_index=0, **spread_kwargs):
    """Write a DataFrame to Google Drive."""
    sh = get_sheet(foldername, filename)    
    spread = Spread(sh.id)
    skwargs = {'replace': True}
    skwargs.update(spread_kwargs)
    spread.df_to_sheet(df, **skwargs)
Пример #3
0
def workflow(sheetid, tablename):
    """Process"""
    cursor = pgconn.cursor()
    cursor.execute(f"DROP TABLE IF EXISTS {tablename}")
    spread = Spread(sheetid, config=config["td"]["service_account"])
    df = spread.sheet_to_df(index=None)
    cols = [cleaner(c) for c in df.columns]
    cursor.execute(f"CREATE TABLE {tablename} (ss_order int, %s)" %
                   (",".join([' "%s" varchar' % (s, ) for s in cols]), ))
    cursor.execute(f"GRANT SELECT on {tablename} to nobody,apache")
    for i, row in enumerate(df.itertuples()):
        vals = []
        for col in row[1:]:
            vals.append(col)
        sql = """
        INSERT into %s (ss_order, %s) VALUES (%s, %s)
        """ % (
            tablename,
            ",".join(['"%s"' % (s, ) for s in cols]),
            i,
            ",".join(["%s"] * len(cols)),
        )
        cursor.execute(sql, vals)
    cursor.close()
    pgconn.commit()
Пример #4
0
def uploadtosheet():
    file_name = "xyz.csv"
    df = pd.read_csv(file_name)

    # 'Example Spreadsheet' needs to already exist and your user must have access to it
    spread = Spread('Example Spreadsheet')
    # This will ask to authenticate if you haven't done so before

    # Display available worksheets
    spread.sheets

    # Save DataFrame to worksheet 'New Test Sheet', create it first if it doesn't exist
    spread.df_to_sheet(df,
                       index=False,
                       sheet='SDSheet',
                       start='A2',
                       replace=True)
    print(spread)
    # <gspread_pandas.client.Spread - User: '******', Spread: 'Example Spreadsheet', Sheet: 'New Test Sheet'>

    # You can now first instanciate a Client separately and query folders and
    # instanciate other Spread objects by passing in the Client
    client = Client()
    # Assumming you have a dir called 'example dir' with sheets in it
    available_sheets = client.find_spreadsheet_files_in_folders('example dir')
    spreads = []
    for sheet in available_sheets.get('example dir', []):
        spreads.append(Spread(sheet['id'], client=client))
Пример #5
0
 def __init__(self):
     options = Options()
     options.add_argument("--headless")
     self.driver = webdriver.Firefox(
         executable_path=
         r'C:/Users/Abdul Rehman/Downloads/geckodriver-v0.26.0-win64/geckodriver.exe'
     )
     #self.driver.get("https://www.lookfantastic.com/")
     self.spreadsheet_key = Spread(
         "https://docs.google.com/spreadsheets/d/1aradT8_30SUbEj13ZyDju78V80eJB-rJyX9UmJGBxHA/edit?usp=sharing"
     )
     now = datetime.datetime.now()
     self.date = now.strftime("%Y.%m.%d")
     self.data = pd.DataFrame(
         columns=['Brand', 'Title', 'Price(EUR)', 'Review', 'Url'])
     #self.f = open("cometic_test" + self.date + ".csv","w", encoding='utf8')
     #button = self.driver.find_element_by_xpath('//*[@class="emailReengagement_close_button"]')
     #button.click()
     self.urls = {
         "https://www.lookfantastic.com/health-beauty/fragrance/eau-de-toilette.list":
         "Eau de toilette",
         "https://www.lookfantastic.com/health-beauty/fragrance/eau-de-parfum.list":
         "Eau de Parfum",
         "https://www.lookfantastic.com/brands/mac/view-all.list":
         "MAC"
     }
Пример #6
0
def do(spreadkey, tablename):
    """Process"""
    cursor = pgconn.cursor()
    cursor.execute(f"DROP TABLE IF EXISTS {tablename}")
    spread = Spread(spreadkey, config=config["cscap"]["service_account"])
    df = spread.sheet_to_df(index=None)
    sql = f"CREATE TABLE {tablename} ("
    for col in df.columns:
        sql += "%s varchar," % (cleankey(col), )
    sql = sql[:-1] + ")"
    cursor.execute(sql)
    cursor.execute(f"GRANT SELECT on {tablename} to nobody,apache")
    for _, row in df.iterrows():
        cols = []
        values = []
        for key, val in row.items():
            cols.append(cleankey(key))
            values.append((val or "").strip())
        sql = "INSERT into %s (%s) VALUES (%s)" % (
            tablename,
            ",".join(cols),
            ",".join(["%s"] * len(cols)),
        )
        cursor.execute(sql, values)
    cursor.close()
    pgconn.commit()
Пример #7
0
    def keepCrawling(self):

        #self.collectResults()
        counter = 0
        page = 0
        while True:

            try:
                page += 1
                print("Pages searched " + str(page))
                self.collectResults()
                counter += 20
                #if self.stop_crawling == False:
                self.driver.get("https://www.airbnb.com.au/s/" + self.location + "--Australia/homes?refinement_paths%5B%5D=%2Fhomes&query=" + self.location + "&search_type=unknown&map_toggle=false&checkin=" + self.check_in +"&checkout="+ self.check_out + "&section_offset=3&items_offset=" + str(counter))
                if counter > 20:
                    break
            except Exception as e:
                print(str(e))
                break

        #self.data.to_excel("Results.xlsx")
        spreadsheet_key = Spread("https://docs.google.com/spreadsheets/d/15_YZB-LVAa2NkHORZ8nvBq25YBj4R7ZUHjY11HRl0R4/edit#gid=0")
        spreadsheet_key.df_to_sheet(self.data, index=False, sheet=(self.location+" "+self.date))
        price = self.data["price"].values
        price = price.astype(np.float)
        print("The number of properties are: " + str(len(price)))
        print("The mean price is: "+ str(np.mean(price)))
        print("The max price is: "+ str(np.max(price)))
        print("The min price is: "+ str(np.min(price)))
        print("The standard deviation is "+ str(np.std(price)))
Пример #8
0
def share_permission(sheet_id):
    try:
        c = Spread(spread=sheet_id)
        c.add_permission(
            '[email protected]|reader|no')
        print(f'Shared {sheet_id} to service account.')
    except Exception as e:
        print(f'Failed to share {sheet_id} to service account.')
        print(e)
Пример #9
0
def get_raw_data_sheet_to_df(spreadsheet, client, cols_to_check):
    spread = Spread(spread=spreadsheet)

    # Get metadata:
    meta = spread.__dict__.get('_spread_metadata')
    project_id = meta.get('spreadsheetId')
    project_name = meta.get('properties').get('title')

    # Locate RawData sheet
    sheets = spread.sheets

    raw = [
        x for x in sheets
        if 'rawdata' in x.__dict__['_properties']['title'].replace(' ', '').
        replace('.', '').lower() and 'pivot' not in x.__dict__['_properties']
        ['title'].replace(' ', '').replace('.', '').lower()
    ]

    raw.sort(key=lambda x: len(x.__dict__['_properties']['title'].replace(
        ' ', '').replace('.', '')),
             reverse=False)
    df = spread.sheet_records_to_df(empty2zero=False,
                                    header_rows=1,
                                    sheet=raw[0],
                                    default_blank=np.nan)

    # Check for column names:
    df_cols = list(map(lambda x: x.lower(), list(df.columns)))
    cols_to_check = list(map(lambda x: x.lower(), cols_to_check))

    missing_cols = []
    for col in cols_to_check:
        if col not in df_cols:
            missing_cols.append(col)
    if len(missing_cols) > 0:

        print(f"Project {project_name} is missing (id: {project_id}) " +
              ", ".join(missing_cols))
        for missing_col in missing_cols:
            df[missing_col] = ''

    df = df.loc[:, ~df.T.duplicated(keep='first')]
    df.columns = map(lambda x: x.replace(' ', '_').replace('.', '').lower(),
                     df.columns)

    # Add additional fields for future lookup and update
    df['_meta_sheetID'] = project_id
    df['_meta_projectName'] = project_name
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    for col in ['revenue', 'cost', 'profit']:
        df[col] = pd.to_numeric(df[col].astype('str').str.replace(',', ''),
                                errors='coerce').fillna(0)
    df = df.infer_objects()
    return df
Пример #10
0
class Sheet:

	def __init__(self):
		self.spread = Spread(SPREADSHEET_NAME)
		self.ccs= self.spread.sheet_to_df(sheet=CCS_SHEET, index=0)
		self.zu = self.spread.sheet_to_df(sheet=ZOOM_USERS, index=0)
		# self.cs = self.spread.sheet_to_df(sheet=CALENDAR_SCHEDULE)
		self.sr = self.spread.sheet_to_df(sheet=STUDENT_ROSTER, index=0)
		
		# set index in ccs sheet with Unique ID column to get updated.
		# self.ccs = self.ccs.set_index(self.ccs['Unique ID'])

		# set index in sr sheet with ID_NUMBER
		# self.sr = self.sr.set_index(self.sr['COURSE', 'ADDRESS'])

	def update_calendar_schedule(self):
		for cn, cs, desc, loc, teacher, index in zip(
			self.ccs['Course Number'],
			self.ccs['Course Section'],
			self.ccs['Description'],
			self.ccs['Site'],
			self.ccs['Instructor 1'],
			self.ccs['Unique ID']
		):
			class_name = f"Q4-{cn}-{cs}-{desc}"
			location = loc[4:]
			quarter = '2020.Q4'
			slack_channel_name = f"Q4"
			host = '' # from zoom users sheet
			notes = ''

	def update_sheet_from_df(self, df, sheet_name):
		logger.info(f'--- Update {sheet_name} Sheet from df')
		self.spread.df_to_sheet(df, index=False, sheet=sheet_name)

	def update_ccs_sheet_from_df(self, df):
		self.update_sheet_from_df(df, CCS_SHEET)

	# deprecated
	def init_sheet_with_service_key(self):
		self.credentials = service_account.Credentials.from_service_account_file(
			'./creds/google_secret.json',
			scopes=SPREADSHEETS_SCOPES,
			subject=GSUITE_USER_EMAIL)

		self.sheet_service = build('sheets', 'v4', credentials=self.credentials, cache_discovery=False)
		self.sheet = self.sheet_service.spreadsheets()

	# deprecated
	def read_sheets(self, range):
		result = self.sheet.values().get(
			spreadsheetId=SPREADSHEET_ID,
			range=range).execute()
		values = result.get('values', [])
Пример #11
0
def job():
    print('Updating the clan DataFrame...')
    updatedDF = originalDF.append(other=pipeline())
    #updatedDF.to_csv('clan ' + orario)
    spread = Spread('*****@*****.**', 'clan')
    spread.df_to_sheet(updatedDF,
                       index=False,
                       sheet='clan',
                       start='A1',
                       replace=True)
    return updatedDF
Пример #12
0
    def sync(self):
        logger.info(f"Synchronizing Workbook: {self.workbook_id}")

        client = Client(creds=self.credentials)
        spread = Spread(self.workbook_id, create_sheet=True, client=client)

        for sheet_name, data in self.survey.worksheets:
            logger.info(f"Updating Google Sheet: {sheet_name}")
            spread.df_to_sheet(data, index=False, sheet=sheet_name, replace=True)
            logger.success(f"Google Sheet updated: {sheet_name}")

        logger.success(f"Workbook synchronized: {self.workbook_id}")
 def pull_data_sheet(self, config_auth):
     spread_dict = self.__conf__["sheet_url"]
     spread_obj = {}
     sheet_dict = {}
     for j in spread_dict:
         spread = Spread(spread=spread_dict[j],
                         sheet="Sheet1",
                         config=config_auth)
         data_frame = spread.sheet_to_df()
         sheet_dict[j] = data_frame.reset_index()
         spread_obj[j] = spread
     return sheet_dict, spread_obj
class PandasGoogleSpreadsheetWrapper(PandasWrapperBase):
    def __init__(self, credentialsManager, spreadsheetId, dataFrame=None):
        super().__init__(dataFrame)
        self.__credentialsManager = credentialsManager
        self.__id = spreadsheetId
        self.__spreadsheet = Spread(
            spread=self.__id, creds=self.__credentialsManager.credentials)

    @property
    def credentialsManager(self):
        return self.__credentialsManager

    @property
    def spreadsheetId(self):
        return self.__id

    @property
    def spreadsheet(self):
        return self.__spreadsheet

    @property
    def active_sheet(self):
        return self.__spreadsheet.sheet

    def load(self, filePath):
        # filePath = SheetName
        print("Load GoogleSpreadsheet: " + str(self.__id) + " [" +
              str(filePath) + "]")
        self.onLoadPreprocessing(self.df)
        df = self.__spreadsheet.sheet_to_df(index=False, sheet=filePath)
        self.setDataFrame(df)
        print("  Loaded Length: " + str(len(self.df.index)))
        self.onLoadPostprocessing(self.df)
        return self

    def save(self, filePath):
        # filePath = SheetName
        print("Save GoogleSpreadsheet: " + str(self.__id) + " [" +
              str(filePath) + "]")
        self.onSavePreprocessing(self.df)
        self.__spreadsheet.open_sheet(filePath, create=True)
        self.__spreadsheet.df_to_sheet(df=self.df,
                                       index=False,
                                       headers=True,
                                       start='A1',
                                       replace=True)
        print("  Saved Length : " + str(len(self.df.index)))
        self.onSavePostprocessing(self.df)
        return self
 def __init__(self,
              spread,
              sheet=0,
              creds=None,
              create_sheet=False,
              conf_file=None):
     if creds:
         self.creds = creds
     else:
         credentials = ParseConfiguration(conf_file).get_google_creds()
         self.creds = get_creds(config=credentials)
     self.spread = Spread(spread,
                          sheet=sheet,
                          creds=self.creds,
                          create_sheet=create_sheet)
Пример #16
0
def get_events_sheet(only_top=True):
    spreadsheet_user = get_config_field('GSHEETS', 'user')
    s = Spread(user=spreadsheet_user,
               spread='Release & PR Events',
               sheet='Events',
               create_spread=False,
               create_sheet=False)
    events = s.sheet_to_df()
    events.index = pd.to_datetime(events.index)
    events = events.reset_index().reset_index().set_index('date')
    events['top'] = events['top'] == 'TRUE'
    if only_top:
        events = events[events['top']]

    return events
Пример #17
0
def main():
    """Go Main Go."""
    config = util.get_config()

    drive = util.get_driveclient(config)

    res = drive.files().list(q="title contains 'Agronomic Data'").execute()

    for item in res["items"]:
        if item["mimeType"] != "application/vnd.google-apps.spreadsheet":
            continue
        LOG.debug(item["title"])
        spread = Spread(item["id"], config=config["cscap"]["service_account"])
        for sheet in spread.sheets:
            df = spread.sheet_to_df(index=None, sheet=sheet)
            LOG.debug("%s %s", sheet.title, len(df.index))
Пример #18
0
def setGoogleSheetsData(CREDSPATH, SNDATA, SHEETID, SHEETTAB):
    # Connect to the Google Sheets file
    spread = Spread(
        'sn_datapusher', SHEETID, 0,
        conf.get_config(conf_dir=CREDSPATH, file_name='google_secret.json'))
    print('Spreadsheet loaded...')

    # Copies dataframe to google sheet
    spread.df_to_sheet(
        SNDATA,
        index=False,
        sheet=SHEETTAB,
        start='A1',
        replace=True,
    )
    print('Spreadsheet updated!')
Пример #19
0
def update_petrov():
    dfs = et.load_from_file(date_str='most_recent', coll_names=['users'])
    dfu = dfs['users']

    have_codes = pd.read_csv('/Users/rbloom/Downloads/petrov_day_list.csv')
    codes = pd.read_csv('/Users/rbloom/Downloads/codez_secret_no_use.csv', usecols=['codes'])

    have_codes.head()

    projection = ['_id', 'username', 'displayName', 'karma', 'petrovPressedButtonDate', 'petrovCodesEntered', 'petrovCodesEnteredDate']
    users_raw = et.get_collection('users', query_filter={'petrovPressedButtonDate': {'$exists':True}},
                              projection=projection, db=et.get_mongo_db_object())

    users = users_raw.merge(have_codes[['Username']], left_on='username', right_on='Username', indicator=True, how='left')
    users = users.merge(dfu[['_id', 'num_days_present_last_30_days', 'createdAt', 'true_earliest']].fillna(0), left_on='_id', right_on='_id', how='left')
    users['has_codes'] = (users['_merge']=='both')
    users['valid_code'] = users['petrovCodesEntered'].apply(lambda x: x in codes.values)

    users.loc[:,['petrovPressedButtonDate', 'petrovCodesEnteredDate']] = users.loc[:,['petrovPressedButtonDate', 'petrovCodesEnteredDate']] - pd.Timedelta('7 hours')
    users['karma'] = users['karma'].fillna(0)
    users = users.sort_values('petrovPressedButtonDate', ascending=False)

    users = users[ ['displayName', 'karma', 'petrovPressedButtonDate', 'petrovCodesEntered', 'petrovCodesEnteredDate',
                        'has_codes', 'valid_code', 'num_days_present_last_30_days', 'true_earliest']]

    users_pressed = users.dropna(subset=['petrovPressedButtonDate'])
    users_pressed = users_pressed.sort_values('petrovPressedButtonDate', ascending=True)
    users_pressed = users_pressed.reset_index(drop=True).reset_index()
    users_pressed['index'] = users_pressed['index'] + 1
    users_pressed = users_pressed.sort_values('petrovPressedButtonDate', ascending=False)
    print('num users pressed button: {}'.format(users_pressed.shape[0]))

    users_pressed_and_entered = users.dropna(subset=['petrovPressedButtonDate','petrovCodesEntered'])
    print('num users pressed button and entered codes: {}'.format(users_pressed_and_entered.shape[0]))

    users_pressed_and_entered_has_codes = users_pressed_and_entered[users_pressed_and_entered['has_codes']]
    print('num users pressed button and entered codes: {}'.format(users_pressed_and_entered_has_codes.shape[0]))

    # plot_table(users_pressed, title='Users Who Pressed Button', online=True)
    # plot_table(users_pressed_and_entered, title='Users Who Pressed Button and Entered Codes', online=True)
    # plot_table(users_pressed_and_entered_has_codes, title='Users Who Pressed Button and Entered Some Codes Who Have True Codes', online=True)

    users_pressed['birth'] = pd.datetime.now()
    spreadsheet_name = get_config_field('GSHEETS', 'spreadsheet_name')
    s = Spread(get_config_field('GSHEETS', 'user'), spreadsheet_name, sheet='Trust & Doom', create_spread=True,
               create_sheet=True)
    s.df_to_sheet(users_pressed, replace=True, sheet='Trust & Doom', index=False)
Пример #20
0
def get_raw_data_sheet_to_df(spreadsheet, client, cols_to_filter):
    spread = Spread(spread=spreadsheet)

    meta = spread.__dict__.get('_spread_metadata')
    project_id = meta.get('spreadsheetId')
    project_name = meta.get('properties').get('title')

    sheets = spread.sheets
    raw = [
        x for x in sheets
        if 'rawdata' in x.__dict__['_properties']['title'].replace(' ', '').
        replace('.', '').lower() and 'pivot' not in x.__dict__['_properties']
        ['title'].replace(' ', '').replace('.', '').lower()
    ]

    raw.sort(key=lambda x: len(x.__dict__['_properties']['title'].replace(
        ' ', '').replace('.', '')),
             reverse=False)
    df = spread.sheet_to_df(sheet=raw[0], index=0)

    # Check for column names:
    df_cols = list(map(lambda x: x.lower(), list(df.columns)))
    cols_to_filter = list(map(lambda x: x.lower(), cols_to_filter))
    missing_cols = []
    for col in cols_to_filter:
        if col not in df_cols:
            missing_cols.append(col)
    if len(missing_cols) > 0:

        print(f"Project {project_name} is missing (id: {project_id}) " +
              ", ".join(missing_cols))
        for missing_col in missing_cols:
            df[missing_col] = ''

    cols = list(set(cols_to_filter).intersection(df.columns))
    lower_cols = [x.replace(' ', '_').lower() for x in cols]
    if len(cols) == 0:
        return pd.DataFrame(columns=cols_to_filter)
    _df = df.loc[:, cols].copy()
    _df = _df.loc[:, ~_df.T.duplicated(keep='first')]
    _df.columns = map(lambda x: x.replace(' ', '_').lower(), _df.columns)
    _df['date'] = pd.to_datetime(_df['date'], errors='coerce')
    for col in ['revenue', 'cost', 'profit']:
        _df[col] = pd.to_numeric(_df[col].astype('str').str.replace(',', ''),
                                 errors='coerce').fillna(0)
    return _df
Пример #21
0
def import_library_sheet_validation_from_google():
    global validation_df
    spread = Spread(lab_spreadsheet_id)
    validation_df = spread.sheet_to_df(sheet='Validation',
                                       index=0,
                                       header_rows=1,
                                       start_row=1)
    hit = validation_df.iloc[0]
    logger.debug(f"First record of validation data: {hit}")
    for column_name in metadata_validation_column_names:
        logger.debug(f"Checking for column name {column_name}...")
        if column_name not in hit:
            logger.error(
                f"Could not find column {column_name}. The file is not structured as expected! Aborting."
            )
            exit(-1)
    logger.info(f"Loaded library tracking sheet validation data.")
Пример #22
0
    def on_epoch_end(self, epoch, logs=None):
        spread = Spread('*****@*****.**',
                        'https://docs.google.com/spreadsheets/d/1CksJQdyochF1M6RB4XfFgewjYGak38ixOhoAMySWTM8/edit')
        df_results = spread.sheet_to_df(sheet='Quantitative')
        df_results = df_results.reset_index()

        df_results = df_results.append({'Dataset': self.dataset,
                                        'Machine': self.machine,
                                        'Training time': time() - self._start_time,
                                        'Epochs': epoch,
                                        'Loss': logs.get('loss'),
                                        'Accuracy': logs.get('acc'),
                                        'Validation loss': logs.get('val_loss'),
                                        'Validation accuracy': logs.get('val_acc'),
                                        'Weights file': self.file_weights.format(epoch=epoch, **logs),
                                        'Additional parameters': self.additional_parameters}, ignore_index=True)
        spread.df_to_sheet(df_results, sheet='Quantitative', index=False, replace=True)
Пример #23
0
def get_data():
    """ query data from the vendors table """
    conn = None
    scoped_credentials = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']
    credentials = service_account.Credentials.from_service_account_file('./service_account.json', scopes=scoped_credentials)
    gc = gspread.service_account(filename='./service_account.json')
    sh = gc.create("OLX_Ihor_Nikolenko_"+datetime.today().strftime('%Y-%m-%d'))
    sh.share('*****@*****.**', perm_type='user', role='writer')
    spread = Spread(spread="OLX_Ihor_Nikolenko_"+datetime.today().strftime('%Y-%m-%d'), creds=credentials)

    df_overlap = pd.DataFrame(columns=('category_row', 'category_col', 'overlap'))
    try:
        params = config()
        conn = psycopg2.connect(**params)
        cur = conn.cursor()
        cur.execute('WITH Counted_Ads AS ( '
                    'SELECT DISTINCT user_id, '
                    'category_name, '
                    'category_id, '
                    'COUNT(*) AS Ads ' 
                    'FROM ads '
                    'group by user_id, ' 
                    'category_id, category_name '
                    ') '
                    'SELECT *, RANK() OVER(PARTITION BY user_id ORDER BY Ads DESC) AS Rank_Category '  
                    'FROM Counted_Ads')
        print("The number of parts: ", cur.rowcount)
        row = cur.fetchall()
        df = pd.DataFrame(row, columns=('user_id', 'category', 'category_id', 'Ads', 'Rank_Category'))
        attributs = set(df.category)
        for attribut1, attribut2 in product(attributs, attributs):
                    df1 = df[df.category == attribut1]["user_id"]
                    df2 = df[df.category == attribut2]["user_id"]
                    intersection = len(set(df1).intersection(set(df2)))
                    df_overlap.loc[len(df_overlap) + 1] = [attribut1, attribut2, intersection]

        pvt = df_overlap.pivot_table(index=['category_row'], columns=['category_col'], values='overlap', aggfunc='sum')
        spread.df_to_sheet(pvt, index=True, sheet='Test task', start='A1', replace=True)
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
Пример #24
0
def importTo_clean_sheet():
    from gspread_pandas import Spread, Client
    import gspread_pandas as gp
    import datetime
    import numpy as np
    import pandas as pd
    import re
    import importlib
    #import cleaner
    #cleaner = importlib.reload(cleaner)
    #from cleaner import cleaning_and_to_sql

    pd.set_option('display.max_rows', 20)
    pd.set_option('display.max_columns', 50)
    s = Spread('work', 'Python Autoscrapers')
    df1 = s.sheet_to_df(index=0,
                        start_row=1,
                        header_rows=1,
                        sheet="Macbooks-CPH").astype(str)

    import time
    from datetime import datetime

    from datetime import timedelta

    dups_ids = df1.pivot_table(index=['DBA ID'], aggfunc='size')
    type(dups_ids)
    di = dups_ids.to_frame()
    di.head()

    di.columns = ['days active']
    di.reset_index(inplace=True)

    df1 = pd.merge(df1, di, on='DBA ID')
    df1 = df1.drop_duplicates(subset='DBA ID', keep='first')

    filter = df1["Date Scraped"] != " "
    df1 = df1[filter]

    df1['state'] = "Not sold yet"

    #datetime_object = datetime.strptime('Jun 1 2005  1:33PM', '%b %d %Y %I:%M%p')
    for stdat, ndays, idx in zip(df1['Date Scraped'], df1['days active'],
                                 df1.index):
        do = datetime.strptime(stdat, '%d/%m/%Y')
        ndays -= 1
        do = do + timedelta(days=ndays)
        tod = datetime.strptime(today.strftime("%d/%m/%Y"), '%d/%m/%Y')

        if do < tod:
            df1.state[idx] = 'Sold'
    # print(do.strftime("%d/%m/%Y"), "   ", stdat, " today", today.strftime("%d/%m/%Y"))

    s = Spread('work', 'Python Autoscrapers')
    s.df_to_sheet(df1,
                  index=False,
                  sheet='Macbooks-CPH/Clean',
                  start='A2',
                  replace=True)
Пример #25
0
def import_library_sheet_from_google(year):
    global library_tracking_spreadsheet_df
    spread = Spread(lab_spreadsheet_id)
    library_tracking_spreadsheet_df = spread.sheet_to_df(sheet='2019',
                                                         index=0,
                                                         header_rows=1,
                                                         start_row=1)
    hit = library_tracking_spreadsheet_df.iloc[0]
    logger.debug(f"First record: {hit}")
    for column_name in column_names:
        logger.debug(f"Checking for column name {column_name}...")
        if column_name not in hit:
            logger.error(
                f"Could not find column {column_name}. The file is not structured as expected! Aborting."
            )
            exit(-1)
    logger.info(
        f"Loaded {len(library_tracking_spreadsheet_df.index)} records from library tracking sheet."
    )
Пример #26
0
def main():
    print('loading source files...')
    main_doc = Spread(spread=DOCID)
    datapoints = main_doc.sheet_to_df(sheet='datapoints', index=None)
    topics = main_doc.sheet_to_df(sheet='topics', index=None)
    concepts = main_doc.sheet_to_df(sheet='concepts', index=None)
    datapoint_docs = dict()   # we can reuse the spreadsheet object to download multiple sheets
    downloaded = set()
    env = {
        'downloaded': downloaded,
        'datapoint_docs': datapoint_docs
    }
    print('saving datapoints into etl/source/datapoints...')
    for _, row in datapoints.iterrows():
        process(row, env)

    datapoints.to_csv(osp.join(SOURCE_DIR, 'datapoints.csv'), index=False)
    topics.to_csv(osp.join(SOURCE_DIR, 'topics.csv'), index=False)
    concepts.to_csv(osp.join(SOURCE_DIR, 'concepts.csv'), index=False)
Пример #27
0
    def pd_to_gsheet(self,
                     df,
                     spreadsheet_key,
                     worksheet_name,
                     value_input_option='USER_ENTERED',
                     clean=True,
                     use_df2gsprad=True):
        """
        :param df: pandas DataFrame
        :param spreadsheet_key: id for Spreadsheet taken from URL
        :param worksheet_name: name as visibile in worksheet
        :param value_input_option: 'USER_ENTERED' if scope is to maintain column types from pandas DataFrame
        :param clean: whether to clean or not
        :param use_df2gsprad:
        :return:
        """
        if use_df2gsprad:
            if clean is True:
                self.delete_cells(spreadsheet_key, worksheet_name)
            return d2g.upload(df,
                              spreadsheet_key,
                              worksheet_name,
                              credentials=self.creds,
                              row_names=False,
                              value_input_option=value_input_option,
                              clean=clean)

        else:
            if clean is True:
                self.delete_cells(spreadsheet_key, worksheet_name)
            x = Spread(spreadsheet_key,
                       worksheet_name,
                       creds=self.creds,
                       create_sheet=True)
            return x.df_to_sheet(df,
                                 index=False,
                                 sheet=worksheet_name,
                                 replace=clean)
Пример #28
0
def zd_api(request):
    config = conf.get_config("./gspread_pandas/")

    articles = []
    url = "https://collegetrack.zendesk.com//api/v2/help_center/categories/360000085163/articles.json"

    # also not used, but if we wanted to add in users the api URL is: /api/v2/users.json

    while url:
        response = requests.get(url, auth=(ZD_USERNAME, ZD_PASSWORD))
        if response.status_code != 200:
            print("Status:", response.status_code, "Problem with the request. Exiting.")
            exit()
        data = json.loads(response.text)
        for article in data["articles"]:
            articles.append(
                [
                    article["id"],
                    article["title"],
                    article["html_url"],
                    article["updated_at"],
                ]
            )
        url = data["next_page"]

    # converting the date column into a usable form
    # to_write_updated = [datetime.date(parse(i[3])) for i in to_write]

    df = pd.DataFrame(articles)

    df.columns = ["ID", "Title", "URL", "Date"]
    df.Date = pd.to_datetime(df.Date).dt.date

    df = df.sort_values(by="Date", ascending=True)
    df.loc[:, "ID"] = df["ID"].astype("int").astype("str")
    # df = df.set_index("ID")

    # articles.sort(key = lambda item: item[3], reverse=False)

    # articles.sort(key = lambda item: item[3], reverse=True)

    spread = Spread(SPREADSHEET_ID, config=config)
    spread.open_sheet(0)
    existing_sheet = spread.sheet_to_df(sheet="Sheet1")
    existing_sheet_subset = existing_sheet[
        ["Internal / Staff Facing", "Person Responsible"]
    ].reset_index()

    merged_df = df.merge(existing_sheet_subset, on="ID", how="left")

    spread.df_to_sheet(
        merged_df, index=False, sheet="Sheet1", start="A1", replace=False
    )
Пример #29
0
def move_dashboards():
    client = Client()
    # Get Dashboard links:
    dashboard_links_sheet = Spread(
        spread='1geNkTULCutp7PgcqiuMKaNkH7Ynp1nGu3oX1NqoXHBA',
        client=client,
        sheet='Dashboard').sheet_to_df()
    sheet_ids = dashboard_links_sheet['Link dashboard'].str.extract(
        '/d/([^/]+)', expand=False).unique().tolist()
    for sheet_id in sheet_ids:
        try:
            client.move_file(sheet_id, '/New Dashboards')
        except Exception as e:
            print('Error with sheet id:' + str(sheet_id))
            pass
Пример #30
0
def create_and_update_all_sheets(dfs, spreadsheet_name):
    dfu = dfs['users']
    dfp = dfs['posts']
    # dfv = dfs['votes']

    s = Spread(spread=spreadsheet_name,
               sheet=None,
               create_spread=True,
               create_sheet=True,
               user=get_config_field('GSHEETS', 'user'))
    _ = create_and_update_user_sheet(dfu, s)
    _ = create_and_update_posts_sheet(dfp, s)
    # _ = create_and_update_votes_sheet(dfv, s) // we never use this

    return s