Exemplo n.º 1
0
def gsc_with_filters(site,
                     creds,
                     start_date,
                     end_date=default_end,
                     storage='authorizedcreds.dat'):

    scDict = defaultdict(list)  # Create a dict to populate with extraction
    webmasters_service = authorize_creds(creds, storage)  # Authorize the API
    request = {
        'startDate':
        start_date,
        'endDate':
        end_date,
        'dimensions':
        ['date', 'page',
         'query'],  #country, device, page, query, searchAppearance
        'dimensionFilterGroups': [{
            'filters': [{
                'dimension': 'query',
                'operator':
                'contains',  #contains, equals, notEquals, notContains
                'expression': 'chouinard'
            }]
        }]
    }

    response = execute_request(webmasters_service, site, request)

    try:
        for row in response['rows']:
            scDict['date'].append(row['keys'][0] or 0)
            scDict['page'].append(row['keys'][1] or 0)
            scDict['query'].append(row['keys'][2] or 0)
            scDict['clicks'].append(row['clicks'] or 0)
            #scDict['ctr'].append(row['ctr'] or 0)
            #scDict['impressions'].append(row['impressions'] or 0)
            #scDict['position'].append(row['position'] or 0)
    except Exception as e:
        print(f'An error occurred: {e}')

    # Add response to dataframe
    df = pd.DataFrame(data=scDict)
    return df
Exemplo n.º 2
0
def gsc_by_url(list_of_urls,
               creds,
               start_date,
               end_date=default_end,
               storage='authorizedcreds.dat'):
    webmasters_service = authorize_creds(
        creds)  # Get credentials to log in the api
    scDict = defaultdict(list)
    for url in list_of_urls:
        request = {
            'startDate':
            date_to_str(start_date),
            'endDate':
            date_to_str(end_date),
            'dimensions':
            'page',  #country, device, page, query, searchAppearance
            'dimensionFilterGroups': [{
                'filters': [{
                    'dimension': 'page',
                    'operator':
                    'equals',  #contains, equals, notEquals, notContains
                    'expression': url
                }]
            }]
        }

        response = execute_request(webmasters_service, site, request)

        scDict['page'].append(url)

        try:
            for row in response['rows']:
                scDict['clicks'].append(row['clicks'] or 0)
                scDict['impressions'].append(row['impressions'] or 0)
        except Exception as e:
            print(f'An error occurred while extracting {url}: {e}')
    # Add response to dataframe
    df = pd.DataFrame(data=scDict)
    return df
Retrieve a list of validated properties from Google Search Console.
@author:    Jean-Christophe Chouinard. 
@role:      Sr. SEO Specialist at SEEK.com.au
@website:   jcchouinard.com
@LinkedIn:  linkedin.com/in/jeanchristophechouinard/ 
@Twitter:   twitter.com/@ChouinardJC

Learn Python for SEO
jcchouinard.com/python-for-seo

Get API Keys
jcchouinard.com/how-to-get-google-search-console-api-keys/

How to format your request
jcchouinard.com/what-is-google-search-console-api/
'''

from oauth import authorize_creds, execute_request

creds = 'client_secrets.json'

webmasters_service = authorize_creds(creds) 

site_list = webmasters_service.sites().list().execute()

# Filter for verified websites
verified_sites_urls = [s['siteUrl'] for s in site_list['siteEntry']
                       if s['permissionLevel'] != 'siteUnverifiedUser'
                          and s['siteUrl'][:4] == 'http']

Exemplo n.º 4
0
def gsc_to_csv(site,
               output,
               creds,
               start_date,
               end_date=default_end,
               storage='authorizedcreds.dat'):
    get_path = fm.get_full_path(site, output, start_date)
    domain_name = get_path[1]  # Get Domain From URL
    output_path = get_path[3]  # Folder created with your domain name
    fm.create_project(domain_name)  # Create a new project folder
    csv_dt = fm.get_dates_csvs(output_path, site, output)  # Read existing CSV
    webmasters_service = authorize_creds(
        creds)  # Get credentials to log in the api

    # Set up Dates
    dates = dm.get_dates(start_date)
    start_date = dates[1]  # Start first day of the month.
    end_date = end_date  # End 3 days in the past, since GSC don't show latest data.
    delta = datetime.timedelta(
        days=1)  # This will let us loop one day at the time
    scDict = defaultdict(list)  # initialize empty Dict to store data
    while start_date <= end_date:  # Loop through all dates until start_date is equal to end_date.
        curr_month = dm.date_to_YM(start_date)
        full_path = os.path.join(output_path + curr_month + '_' + output)
        print(full_path)
        # If a GSC csv file exists from previous extraction
        # and dates in the file match to dates we are extracting...
        if csv_dt is not None and \
            dm.date_to_str(start_date) in csv_dt:
            print('Existing Date: %s' % start_date)  #... Print the date
            start_date += delta  #... and increment without extraction
        else:  # If the file doesn't exist, or date don't match...
            # ... Print and start the extraction
            print('Start date at beginning: %s' % start_date)

            maxRows = 25000  # Maximum 25K per call
            numRows = 0  # Start at Row Zero
            status = ''  # Initialize status of extraction

            while (status != 'Finished'
                   ):  # As long as today's data have not been fully extracted.
                # Extract this information from GSC
                dt = dm.date_to_str(start_date)
                print(f'date = {dt}')
                request = {
                    'startDate': dt,  # Get today's date (while loop)
                    'endDate': dt,  # Get today's date (while loop)
                    'dimensions': ['date', 'page',
                                   'query'],  # Extract This information
                    'rowLimit':
                    maxRows,  # Set number of rows to extract at once (max 25k)
                    'startRow':
                    numRows  # Start at row 0, then row 25k, then row 50k... until with all.
                }
                response = execute_request(webmasters_service, site, request)
                #Process the response
                try:
                    for row in response['rows']:
                        scDict['date'].append(row['keys'][0] or 0)
                        scDict['page'].append(row['keys'][1] or 0)
                        scDict['query'].append(row['keys'][2] or 0)
                        scDict['clicks'].append(row['clicks'] or 0)
                        scDict['ctr'].append(row['ctr'] or 0)
                        scDict['impressions'].append(row['impressions'] or 0)
                        scDict['position'].append(row['position'] or 0)
                    print('successful at %i' % numRows)

                except:
                    print('error occurred at %i' % numRows)

                # Add response to dataframe
                df = pd.DataFrame(data=scDict)
                df['clicks'] = df['clicks'].astype('int')
                df['ctr'] = df['ctr'] * 100
                df['impressions'] = df['impressions'].astype('int')
                df['position'] = df['position'].round(2)

                # Increment the 'start_row'
                print('Numrows at the start of loop: %i' % numRows)
                try:
                    numRows = numRows + len(response['rows'])
                except:
                    status = 'Finished'  # If no response left, change status
                print('Numrows at the end of loop: %i' % numRows)
                if numRows % maxRows != 0:  # If numRows not divisible by 25k...
                    status = 'Finished'  # change status, you have covered all lines.
            #print(f'DF to write {df.head()}')
            to_write = df[df['date'].str.contains(dm.date_to_str(start_date))]
            fm.write_to_csv(to_write, full_path)
            start_date += delta  # Increment start_date to continue the loop
    print(f'Done extracting {site}')