def check_failed_repos(self): """Check repository watchers on GitHub.""" # Get all of the repositories from Carto all_repos = self.get_all_repos() repos = {} headers = { 'User-Agent': 'VertNet', 'Accept': 'application/vnd.github.v3+json', 'Authorization': 'token {0}'.format(apikey('ghb')) } for repo in all_repos: orgname = repo[0] reponame = repo[1] if orgname is None or reponame is None: self.failed_repos.append(repo) continue # API URL https://api.github.com/repos/[orgname]/[reponame]/subscribers rpc = urlfetch.create_rpc() url = '/'.join([GH_URL, 'repos', orgname, reponame, 'subscribers']) urlfetch.set_default_fetch_deadline(60) urlfetch.make_fetch_call(rpc, url, headers=headers) repos[repo] = rpc # Wait 0.1 second to avoid GitHub abuse triggers time.sleep(0.1) # temporarily hard code the watcher to look for for repo in repos: rpc = repos[repo] result = rpc.get_result() content = json.loads(result.content) s = 'Version: %s' % __version__ s += '\nGot {0} watchers for {1}'.format(len(content), repo[0]) logging.info(s) watcher_list = [x['login'] for x in content] if watcher_list is None or self.watcher not in watcher_list: orgname = repo[0] reponame = repo[1] if orgname is None and reponame is None: self.failed_repos.append(repo) else: s = 'http://github.com/' if orgname is not None: s += '%s' % orgname s += '/' if reponame is not None: s += '%s' % reponame s += '/' self.failed_repos.append(s) return
def store_models(models, key, testing=False): try: model_urls = json.loads(open(model_url_path, 'r').read().rstrip()) except IOError: model_urls = {} if testing is True: org = 'jotegui' repo = 'statReports' else: # TODO: Update this block from util import apikey # Remove when repo changed to VertNet key = apikey(True) # Remove when repo changed to VertNet org = 'jotegui' # Change to VertNet org repo = 'statReports' # Change to VertNet repo for model in models: #created_at = models[model]['created_at'].replace('/', '_') message = 'Putting JSON data on {0} for {1}, {2}'.format(models[model]['report_month'], models[model]['github_org'], models[model]['github_repo']) commiter = {'name': 'VertNet', 'email': '*****@*****.**'} content = base64.b64encode(json.dumps(models[model])) path = 'data/{0}_{1}.json'.format(model.replace(' ', '_'), models[model]['report_month'].replace('/', '_')) headers = {'User-Agent': 'VertNet', 'Authorization': 'token {0}'.format(key)} request_url = 'https://api.github.com/repos/{0}/{1}/contents/{2}'.format(org, repo, path) json_input = json.dumps({"message": message, "commiter": commiter, "content": content}) r = requests.put(request_url, data=json_input, headers=headers) status_code = r.status_code if status_code == 201: logging.info('SUCCESS - Data model stored for resource {0}'.format(repo)) else: logging.error('DATA MODEL CREATION FAILED for resource {0}'.format(repo)) time.sleep(2) # Wait 2 sput_store_reportseconds between insert and insert to avoid 409 if model not in model_urls: model_urls[model] = [request_url] else: model_urls[model].append(request_url) # Store urls on the generated models f = open(model_url_path, 'w') f.write(json.dumps(model_urls)) f.close() logging.info('MODEL URLs stored in local file modelURLs.json') return
def load_previous_model(model): # If it's the first time, take 2013 and 2014/01-03 values from files key = apikey(testing=False) if model['last_report_url'] == "": model = add_initial_year(model, month="03") model = add_initial_year(model, month="02") model = add_initial_year(model, month="01") model = add_initial_history(model) # Else, take values from last month's json else: retries = 5 cont = 0 success = False url = model['last_report_url'] headers = {'User-Agent': 'VertNet', 'Authorization': 'token {0}'.format(key)} while cont < retries: r = requests.get(url, headers=headers) if r.status_code == 200: prev_model = json.loads(base64.b64decode(json.loads(r.content)['content'])) model['year'] = prev_model['year'] model['history'] = prev_model['history'] success = True break else: cont += 1 logging.warning("Attempt failed with status {0}") logging.warning(r.content['message']) logging.warning("Will retry in 5 seconds".format(r.status_code)) time.sleep(5) if success is False: logging.error("Something went wrong retrieving past data for {0} in {1}".format(model["url"], url)) model['year'] = {"downloads": 0, "records": 0, "downloads_period": 0, "records_period": 0} model['history'] = {"downloads": 0, "records": 0, "downloads_period": 0, "records_period": 0} return model
def check_failed_repos(self): """Check repository name consistency between Carto and GitHub.""" all_repos = self.get_all_repos() repos = {} headers = { 'User-Agent': 'VertNet', 'Accept': 'application/vnd.github.v3+json', 'Authorization': 'token {0}'.format(apikey('ghb')) } for repo in all_repos: orgname = repo[0] reponame = repo[1] if orgname is None or reponame is None: self.failed_repos.append(repo) continue rpc = urlfetch.create_rpc() url = '/'.join([GH_URL, 'orgs', orgname, 'repos?per_page=100']) urlfetch.set_default_fetch_deadline(60) urlfetch.make_fetch_call(rpc, url, headers=headers) repos[repo] = rpc for repo in repos: rpc = repos[repo] result = rpc.get_result() content = json.loads(result.content) s = 'Version: %s' % __version__ s += '\nGot {0} repos for {1}'.format(len(content), repo[0]) logging.info(s) repo_list = [x['name'] for x in content] if repo_list is None or repo[1] not in repo_list: self.failed_repos.append(repo) return
def check_failed_repos(self): """Check repository name consistency between CartoDB and GitHub.""" all_repos = self.get_all_repos() repos = {} headers = { 'User-Agent': 'VertNet', 'Accept': 'application/vnd.github.v3+json', 'Authorization': 'token {0}'.format(apikey('ghb')) } for repo in all_repos: orgname = repo[0] reponame = repo[1] if orgname is None or reponame is None: self.failed_repos.append(repo) continue rpc = urlfetch.create_rpc() url = '/'.join([GH_URL, 'orgs', orgname, 'repos']) urlfetch.set_default_fetch_deadline(60) urlfetch.make_fetch_call(rpc, url, headers=headers) repos[repo] = rpc for repo in repos: rpc = repos[repo] result = rpc.get_result() content = json.loads(result.content) logging.info("Got {0} repos for {1}".format(len(content), repo[0])) repo_list = [x['name'] for x in content] if repo_list is None or repo[1] not in repo_list: self.failed_repos.append(repo) return
def send_issue(self, report_entity): """.""" report_key = report_entity.key logging.info("Ready to send issue to %s" % report_key.id()) gbifdatasetid = report_entity.reported_resource.id() logging.info("Sending issue for dataset {0}".format(gbifdatasetid)) # Build variables dataset_key = report_entity.reported_resource period_key = report_entity.reported_period dataset_entity, period_entity = ndb.get_multi([dataset_key, period_key]) # Check that dataset exists if not dataset_entity: self.error(500) resp = { "status": "error", "message": "Missing dataset in datastore." " Please run /setup_datasets to fix", "data": { "missing_dataset_key": dataset_key } } logging.error(resp) self.response.write(json.dumps(resp)+"\n") return # GitHub stuff org = dataset_entity.github_orgname repo = dataset_entity.github_reponame logging.info(org) logging.info(repo) key = apikey('ghb') user_agent = 'VertNet' # Testing block if self.testing: logging.info("Using testing repositories in jotegui") org = 'jotegui' repo = 'statReports' user_agent = 'jotegui' key = apikey('jot') # GitHub request headers headers = { 'User-Agent': user_agent, 'Authorization': 'token {0}'.format(key), "Accept": "application/vnd.github.v3+json" } # Issue creation, only if issue not previously created if report_entity.issue_sent is False: link = "http://" + MODULE + "/reports/" + gbifdatasetid + \ "/" + self.period + "/" link_all = "http://" + MODULE + "/reports/" + gbifdatasetid + "/" title = 'Monthly VertNet data use report for %s-%s, resource %s' \ % (period_entity.year, period_entity.month, dataset_entity.ccode) body = """Your monthly VertNet data use report is ready! You can see the HTML rendered version of the reports with this link: {0} Raw text and JSON-formatted versions of the report are also available for download from this link. In addition, a copy of the text version has been uploaded to your GitHub repository, under the "Reports" folder. Also, a full list of all reports can be accessed here: {1} You can find more information on the reporting system, along with an explanation of each metric, here: http://www.vertnet.org/resources/usagereportingguide.html Please post any comments or questions to: http://www.vertnet.org/feedback/contact.html Thank you for being a part of VertNet. """.format(link, link_all) labels = ['report'] request_url = '{0}/{1}/{2}/issues'.format(GH_REPOS, org, repo) json_input = json.dumps({ 'title': title, 'body': body, 'labels': labels }) # Make GitHub call r = urlfetch.fetch( url=request_url, method=urlfetch.POST, headers=headers, payload=json_input ) # Check output logging.info(r.status_code) # HTTP 201 = Success if r.status_code == 201: logging.info("Issue %s successfully sent" % report_key.id()) report_entity.issue_sent = True # Other generic problems else: logging.error("Issue %s couldn't be sent" % report_key.id()) logging.error(r.content) resp = { "status": "failed", "message": "Got uncaught error code when uploading" " report to GitHub. Aborting issue creation.", "source": "send_to_github", "data": { "report_key": report_key, "period": self.period, "testing": self.testing, "error_code": r.status_code, "error_content": r.content } } logging.error(resp) return # This 'else' should NEVER happen else: logging.warning("Issue for %s was already sent. This call" " shouldn't have happened" % report_key.id()) # Store updated version of Report entity report_entity.put() # Wait 2 seconds to avoid GitHub abuse triggers time.sleep(2) return
def send_issue(self, report_entity): """.""" report_key = report_entity.key gbifdatasetid = report_entity.reported_resource.id() s = "Version: %s\n" % __version__ s += "Storing issue for dataset %s" % gbifdatasetid logging.info(s) # Build variables dataset_key = report_entity.reported_resource period_key = report_entity.reported_period dataset_entity, period_entity = ndb.get_multi([dataset_key, period_key]) # Check that dataset exists if not dataset_entity: self.error(500) resp = { "status": "error", "message": "Missing dataset in datastore. Please run /setup_datasets " "or remove associated Period entity from data store to fix.", "data": { "missing_dataset_key": gbifdatasetid } } s = "Version: %s\n" % __version__ s += "Response: %s" % resp logging.error(s) self.response.write(json.dumps(resp)+"\n") # Set 'issue_sent' to True to avoid endless loop in the case a dataset does # not exist in the datastore. # TODO: Better if the Report entity had a flag for 'issue_skipped' # with default None. But, for now... report_entity.issue_sent = True # Store updated version of Report entity report_entity.put() return # GitHub stuff org = dataset_entity.github_orgname repo = dataset_entity.github_reponame user_agent = 'VertNet' key = apikey('ghb') # Testing block if self.testing: org = 'VertNet' repo = 'statReports' user_agent = 'VertNet' key = apikey('ghb') s = "Version: %s\n" % __version__ s += "Using GitHub repository %s/%s " % (org, repo) s += "as user_agent %s" % user_agent logging.info(s) # GitHub request headers headers = { 'User-Agent': user_agent, 'Authorization': 'token {0}'.format(key), "Accept": "application/vnd.github.v3+json" } # Issue creation, only if issue not previously created if report_entity.issue_sent == False: link_all = "http://%s/reports/%s/" % (MODULE, gbifdatasetid) link = "http://%s/reports/%s/%s/" % (MODULE, gbifdatasetid, self.period) link_gh = "https://github.com/%s/%s/tree/master/reports" % (org, repo) title = 'Monthly VertNet data use report for %s-%s, resource %s' \ % (period_entity.year, period_entity.month, dataset_entity.ccode) body = """Your monthly VertNet data use report is ready! You can see the HTML rendered version of this report at: {0} Raw text and JSON-formatted versions of the report are also available for download from this link. A copy of the text version has also been uploaded to your GitHub repository under the "reports" folder at: {1} A full list of all available reports can be accessed from: {2} You can find more information on the reporting system, along with an explanation of each metric, at: http://www.vertnet.org/resources/usagereportingguide.html Please post any comments or questions to: http://www.vertnet.org/feedback/contact.html Thank you for being a part of VertNet. """.format(link, link_gh, link_all) labels = ['report'] request_url = '{0}/{1}/{2}/issues'.format(GH_REPOS, org, repo) json_input = json.dumps({ 'title': title, 'body': body, 'labels': labels }) # Make GitHub call r = urlfetch.fetch( url=request_url, method=urlfetch.POST, headers=headers, payload=json_input ) # Check output # HTTP 201 = Success if r.status_code == 201: s = "Version: %s\n" % __version__ s += "Status: %s. Issue %s sent." % (r.status_code, report_key.id()) logging.info(s) report_entity.issue_sent = True # Other generic problems else: resp = { "status": "failed", "message": "Got uncaught error code when uploading" " report to GitHub. Aborting issue creation.", "source": "send_to_github", "data": { "report_key": report_key, "period": self.period, "testing": self.testing, "error_code": r.status_code, "error_content": r.content } } s = "Version: %s\n" % __version__ s += "Response: %s. " % resp logging.error(s) return # This 'else' should NEVER happen else: s = "Version: %s\n" % __version__ s += "Issue for %s was already sent, " % report_key.id() s += "but 'issue_sent' property was 'False'. " s += "This call should not have happened." logging.error(s) # Store updated version of Report entity report_entity.put() # Wait 2 seconds to avoid GitHub abuse triggers, 1 isn't sufficient time.sleep(2) return
def store_report(self, report_entity): """.""" report_key = report_entity.key logging.info("Ready to store %s" % report_key.id()) gbifdatasetid = report_entity.reported_resource.id() logging.info("Storing report for dataset {0}".format(gbifdatasetid)) # Build variables dataset_key = report_entity.reported_resource period_key = report_entity.reported_period dataset_entity, period_entity = ndb.get_multi([dataset_key, period_key]) # Check that dataset exists if not dataset_entity: self.error(500) resp = { "status": "error", "message": "Missing dataset in datastore." " Please run /setup_datasets to fix", "data": { "missing_dataset_key": dataset_key } } logging.error(resp) self.response.write(json.dumps(resp)+"\n") return # GitHub stuff org = dataset_entity.github_orgname repo = dataset_entity.github_reponame logging.info(org) logging.info(repo) key = apikey('ghb') user_agent = 'VertNet' # Testing block if self.testing: logging.info("Using testing repositories in jotegui") org = 'jotegui' repo = 'statReports' user_agent = 'jotegui' key = apikey('jot') # GitHub request headers headers = { 'User-Agent': user_agent, 'Authorization': 'token {0}'.format(key), "Accept": "application/vnd.github.v3+json" } # Upload txt report to GitHub, only if not previously stored if report_entity.stored is False: # Load template template = JINJA_ENVIRONMENT.get_template('report.txt') # Render template with values from Report content = template.render( dataset=dataset_entity, report=report_entity, period=period_entity ) # Build GitHub request parameters: message message = content.split("\n")[1] # 2nd line of txt report # Build GitHub request parameters: committer committer = GH_COMMITTER # Build GitHub request parameters: content content_enc = base64.b64encode(content.encode('utf-8')) # Build GitHub request parameters json_input = json.dumps({ "message": message, "committer": committer, "content": content_enc }) # Build GitHub request URL: path txt_path = "-".join([dataset_entity.icode, dataset_entity.ccode, "-".join([self.period[:4], self.period[4:]])]) path = "reports/{0}.txt".format(txt_path) logging.info(path) # Build GitHub request URL request_url = '{0}/{1}/{2}/contents/{3}'.format(GH_REPOS, org, repo, path) logging.info(request_url) # Make GitHub call r = urlfetch.fetch( url=request_url, method=urlfetch.PUT, headers=headers, payload=json_input ) # Check output logging.info(r.status_code) # HTTP 201 = Success if r.status_code == 201: logging.info("Report %s successfully stored" % report_key.id()) report_entity.stored = True # HTTP 422 = 'SHA' missing, meaning report was already there elif r.status_code == 422: logging.warning("Report %s was already stored, but 'stored'" " property was stored as 'False'. This call" " shouldn't have happened" % report_key.id()) logging.error(r.content) report_entity.stored = True # Other generic problems else: logging.error("Report %s couldn't be stored" % report_key.id()) logging.error(r.content) resp = { "status": "failed", "message": "Got uncaught error code when uploading" " report to GitHub. Aborting issue creation.", "source": "send_to_github", "data": { "report_key": report_key, "period": self.period, "testing": self.testing, "error_code": r.status_code, "error_content": r.content } } logging.error(resp) return # This 'else' should NEVER happen else: logging.warning("Report %s was already stored. This call" " shouldn't have happened" % report_key.id()) # Store updated version of Report entity report_entity.put() # Wait 2 seconds to avoid GitHub abuse triggers time.sleep(2) return
import requests import json import logging from util import apikey __author__ = '@jotegui' ghb_url = 'https://api.github.com' cdb_url = "https://vertnet.cartodb.com/api/v2/sql" testing = False key = apikey(testing) headers = { 'User-Agent': 'VertNet', # Authenticate as VertNet 'Accept': 'application/vnd.github.v3+json', # Require version 3 of the API (for stability) 'Authorization': 'token {0}'.format(key) # Provide the API key } class ConsistencyError(Exception): def __init__(self,value): self.value=value def __str__(self): return repr(self.value) def get_all_repos(): """Extract a list of all github_orgnames and github_reponames from CartoDB.""" query = "select github_orgname, github_reponame from resource_staging where ipt is true and networks like '%VertNet%';" params = {'q':query}
def store_report(self, report_entity): """Write report file to GitHub.""" # Build variables dataset_key = report_entity.reported_resource period_key = report_entity.reported_period dataset_entity, period_entity = ndb.get_multi( [dataset_key, period_key]) report_key = report_entity.key gbifdatasetid = report_entity.reported_resource.id() # Check that dataset exists if not dataset_entity: self.error(500) resp = { "status": "error", "message": "Missing dataset in datastore. Please run /setup_datasets " "or remove associated Period entity from data store to fix.", "data": { "missing_dataset_key": gbifdatasetid } } s = "Version: %s\n" % __version__ s += "Response: %s" % resp logging.error(s) self.response.write(json.dumps(resp) + "\n") # Set 'stored' to True to avoid endless loop in the case a dataset does # not exist in the datastore. # TODO: Better if the Report entity had a flag for 'storage_skipped' # with default None. But, for now... report_entity.stored = True # Store updated version of Report entity report_entity.put() return # GitHub stuff org = dataset_entity.github_orgname repo = dataset_entity.github_reponame user_agent = 'VertNet' key = apikey('ghb') # Testing block if self.testing: org = 'VertNet' repo = 'statReports' user_agent = 'VertNet' key = apikey('ghb') # GitHub request headers headers = { 'User-Agent': user_agent, 'Authorization': 'token {0}'.format(key), "Accept": "application/vnd.github.v3+json" } # Load template template = JINJA_ENVIRONMENT.get_template('report.txt') # Render template with values from Report content = template.render(dataset=dataset_entity, report=report_entity, period=period_entity) # Build GitHub request parameters: message message = content.split("\n")[1] # 2nd line of txt report # Build GitHub request parameters: committer committer = GH_COMMITTER # Build GitHub request parameters: content content_enc = base64.b64encode(content.encode('utf-8')) # Build GitHub request parameters json_input = json.dumps({ "message": message, "committer": committer, "content": content_enc }) # Build GitHub request URL: path txt_path = "-".join([ dataset_entity.icode, dataset_entity.ccode, "-".join([self.period[:4], self.period[4:]]) ]) path = "reports/{0}.txt".format(txt_path) s = "Storing at: %s/%s/%s/%s" % (GH_REPOS, org, repo, path) logging.info(s) # Build GitHub request URL request_url = '{0}/{1}/{2}/contents/{3}'.format( GH_REPOS, org, repo, path) # Make GitHub call r = urlfetch.fetch(url=request_url, method=urlfetch.PUT, headers=headers, payload=json_input) # Check output # HTTP 201 = Success if r.status_code == 201: report_entity.stored = True s = "Report %s sent " % report_key.id() s += "for gbifdatasetid %s to %s " % (gbifdatasetid, path) logging.info(s) # HTTP 422 = 'SHA' missing, meaning report was already there elif r.status_code == 422: report_entity.stored = True s = "Report %s was already stored " % report_key.id() s += "for gbifdatasetid %s at %s " % (gbifdatasetid, path) logging.warning(s) # Other generic problems else: resp = { "status": "failed", "message": "Got uncaught error code when uploading report to GitHub.", "source": "send_to_github", "data": { "report_key": report_key, "period": self.period, "gbifdatasetid": gbifdatasetid, "github_issue": self.github_issue, "testing": self.testing, "error_code": r.status_code, "error_content": r.content } } s = "Version: %s\n" % __version__ s += "Response: " % resp logging.error(s) return # Store updated version of Report entity report_entity.put() # Wait 2 seconds to avoid GitHub abuse triggers. 1 isn't sufficient. time.sleep(2) return