def run_gsc_cron(): sites = list_sites() error = False message = "" try: #Audit Sites audit = bigq.audit_tables(sites) log.info('Tables Audited') #load site data to BigQuery for site in sites: end_date = cfg.END_OFFSET_DATE start_date = cfg.START_OFFSET_DATE if start_date > end_date and db.last_date(site) is not None: start_date = end_date for x in range(end_date, (start_date + 1)): loaded = load_site_data(site, x) if loaded: log.info('Site Data Loaded for {0}'.format(site)) else: log.error('Could not load data for {0}'.format(site)) message = str(sites) except HttpError as e: log.error("GSC Cron Error, {0}".format(e.content)) message = "GSC Cron Error, {0}".format(e.content) error = True return error, message
def get(self): sites = None sitelist = [] reset = self.request.get('reset','0') if reset == '1': db.delete_entries() message = "Cron database deleted. <a href='/'>Refresh page</a>." elif not check_credentials(): message = "Upload service credentials JSON from Google App Engine and update filename in config.py file." elif cfg.HIDE_HOMEPAGE: message = "Nothing to see here." else: try: message = "Add <strong>{0}</strong> to each Google Search Console account that you want to manage.".format(svcdata['client_email']) sites = gsc.list_sites() for site in sites: sitelist.append(site + " (Last Save: " + db.last_date(site) + "," + str(db.last_count(site)) + " rows)" ) except HttpError as e: log.error("GSC Error, {0} ".format(e.content)) message = "GSC Error, {0}. Check Log. ".format(e.content) variables = { 'message': message, 'sites': sitelist } self.render_response('root.html', **variables)
def load_site_data(site): data = None loaded = False allSiteRows = 0 if db.last_date(site) == get_offset_date(): #Already loaded log.info('Ignoring. Already run this day for site {0}.'.format(site)) return False service = get_gsc_service() for query in cfg.GSC_QUERY_LIST: query['startDate'] = get_offset_date() query['endDate'] = get_offset_date() query['startRow'] = 0 rowsSent = 0 while True: data = execute_request(service, site, query) if data and 'rows' in data: rows = data['rows'] numRows = len(rows) try: result = bigq.stream_row_to_bigquery(site, rows) log.info('Added {0} rows to {1}'.format(numRows, site)) rowsSent += numRows query['startRow'] = int(rowsSent + 1) loaded = True except HttpError as e: log.error("Stream to Bigquery Error. \n" + e.content) return False else: allSiteRows += rowsSent log.info("No rows left. Start row of last request was " + str(query['startRow']) + ".") break if allSiteRows > 0: db.add_entry(site, get_offset_date(), allSiteRows) else: log.info("No rows found for this site.") return loaded
def load_site_data(site, offset): data = None loaded = False query = cfg.GSC_QUERY if db.last_date(site) == get_offset_date(offset): #Already loaded log.info('Ignoring. Already run this day for site {0}.'.format(site)) return False query['startDate'] = get_offset_date(offset) query['endDate'] = get_offset_date(offset) query['startRow'] = 0 service = get_gsc_service() rowsSent = 0 while True: data = execute_request(service, site, query) if data and 'rows' in data: rows = data['rows'] numRows = len(rows) try: result = bigq.stream_row_to_bigquery(site, rows) log.info('Added {0} rows to {1}'.format(numRows, site)) rowsSent += numRows loaded = True if numRows == 10000: query['startRow'] = int(rowsSent + 1) continue else: if numRows and numRows > 0: db.add_entry(site, get_offset_date(offset), rowsSent) break except HttpError as e: log.error("Stream to Bigquery Error. ", e.content) break else: break return loaded