def simulate(cache, trace): global misses global hits for line in trace: splitLine = line.split() if (len(splitLine) == 3): trash, op, address = splitLine if op == 'R': result = cache.read(address) if (result == 0): misses += 1 cache.load(address) cache.read(address) else: hits += 1 else: result = cache.write(address) if (result == 0): misses += 1 cache.load(address) cache.write(address) else: hits += 1 print_results(misses, hits)
def getStandings(teams): key = "standings_" + "_".join(teams) standings = load(key) now = datetime.now() if standings is None: data = urllib2.urlopen("http://mlb.mlb.com/lookup/json/named.standings_schedule_date.bam?season=%d&schedule_game_date.game_date='%s'&sit_code='h0'&league_id=103&league_id=104&all_star_sw='N'&version=2" % (now.year, now.strftime("%Y/%m/%d"))) data = json.load(data)["standings_schedule_date"]["standings_all_date_rptr"]["standings_all_date"] rows = data[0]["queryResults"]["row"] + data[1]["queryResults"]["row"] standings = [] for row in rows: found = getTeam(row["team_abbrev"]) if found is not None and row["team_abbrev"] in teams: standings.append({ "name": found.name, "abbrev": found.code, "subreddit": found.subreddit, "wins": int(row["w"]), "losses": int(row["l"]), "percent": row["pct"], "games_back": row["gb"] }) standings.sort(lambda a, b: -1 if a["percent"] > b["percent"] else 1 if a["percent"] < b["percent"] else -1 if a["wins"] > b["wins"] else 1 if a["wins"] < b["wins"] else 0) save(key, standings, 30) return standings
def connFromId(id, hafas = None): if hafas is None: hafas = Hafas if cache.in_cache(id): c = cache.load(id) return (c.sections[0].train, c) else: sid = "{}".format(int(id, 16)) sourceId = sid[1:10] destinationId = sid[10:19] date = datetime.strptime(sid[19:], '%Y%m%d%H%M') source = HafasStation('dummy', sourceId, []) destination = HafasStation('dummy', destinationId, []) cl = hafas.searchConnections(source, destination, date,1) I = 10 if datetime.combine(cl[-1].date, cl[-1].departure.time) != date: while datetime.combine(cl[-1].date, cl[-1].departure.time) < date: next(cl) if I == 0: break I -= 1 c = cl[-1] c.queryRelation(hafas) cache.save(c, id) return (c.sections[0].train, c)
def load_from_path(self, path, project_root): self.clear() # Tables longpath = os.path.join(path, 'tables/*.yml') for filename in glob.glob(longpath): t = cache.load(project_root, filename, Table) self.add_table(t)
def load_matrix(path): ''' path: `str` Path to .mtx file. Returns: matrix: `numpy.ndarray(np.float32)`, (N,N) Number of people traveling from zone `i` to zone `j` in `matrix[i,j]`. zones: `numpy.ndarray(str)`, (N) Name of zone `i` in `zones[i]`. ''' cachename = path r = cache.load(cachename) if r is not None: return r m = ReadPTVMatrix(filename=p) matrix = m['matrix'].astype(np.float32) ids = [int(z.coords['zone_no'].data) for z in m['zone_name']] origins = [int(v.data) for v in matrix['origins']] destinations = [int(v.data) for v in matrix['destinations']] assert origins == ids, \ "different order in matrix['origins'] and zone_name" assert destinations == ids, \ "different order in matrix['destinations'] and zone_name" zonenames = np.array([str(z.data) for z in m['zone_name']]) r = matrix.data, zonenames return cache.save(cachename, r)
def get_version_by_name(self, name): filename = "/versions/%s.yml" % name if os.path.exists(self.project_root + filename): version = cache.load(self.project_root, filename, Version) return version else: return None
def get_job_bugzilla_suggestions_json(args, repo, job_id, include_related_bugs=False, update_cache=False): """get_job_bugzilla_suggestions_json Retrieve job_bugzilla_suggestions given args, and job_id """ cache_attributes = ['treeherder', repo, 'bugzilla_suggestions'] suggestions_data = cache.load(cache_attributes, job_id) if suggestions_data and not update_cache: suggestions = json.loads(suggestions_data) else: bugzilla_suggestions_url = '%s/api/project/%s/jobs/%s/bug_suggestions/' % ( (URL, repo, job_id)) suggestions = utils.get_remote_json(bugzilla_suggestions_url) cache.save(cache_attributes, job_id, json.dumps(suggestions, indent=2)) if args.test_failure_pattern: bugzilla_suggestions = [ suggestion for suggestion in suggestions if args.test_failure_pattern.search(suggestion['search']) ] else: bugzilla_suggestions = suggestions if not include_related_bugs: for bug_data in bugzilla_suggestions: del bug_data['bugs'] return bugzilla_suggestions
def __init__(self): self.sparql = SPARQLWrapper("http://etree.linkedmusic.org/sparql") self.sparql.setReturnFormat(JSON) self.sparql.setMethod("POST") performances = cache.load('list_all_performances') # performances = self.get_all_performances() # cache.save(performances, 'list_all_performances') print('Got perm') self.examine_tracklists(performances)
def test_save_cache(self): with fixtures.TestFixture("head_test1test2", clean=True) as tf: project_path = tf.temp_path filename = "/versions/head.yml" expected = schema.Version() content = fixtures.read_from_file(project_path + filename) expected.load_from_str(content) cached = cache.load(project_path, filename, schema.Version) assert cached == expected print(project_path + "/.cache" + filename) assert os.path.exists(project_path + "/.cache" + filename + ".pickle")
def check_page(): page = crawl(configuration['targetURL']) # .decode("utf8") page_hash = md5(page) c = load() if not c['hash'] == page_hash: print("HASH CHANGED! (" + page_hash + ")") # Run a background thread to archive the page in the web archive start_new_thread(crawl, ("https://web.archive.org/save/" + configuration['targetURL'], False)) # Check if the file is online and we didn't sent the mail already (if so send it) match = parse(page.decode('utf8')) if match is not None and not c['mailSent']: print( "FILE IS ONLINE! Sending mails ... (and we didn't sent them already)" ) docx = crawl(match) for person_details in configuration['details']: variables = { "name": person_details['name'], "year": person_details['targetYear'], "quarter": person_details['quarter'], "mail": person_details['mail'], "streetAndCity": person_details['streetAndCity'], "phone": person_details['phone'], "matrikelnr": person_details['matrikelnr'] } res = parser.update_document_contents(docx, person_details) res_filename = "Antrag Wohnheimzimmer " + variables[ 'quarter'] + " " + variables['year'] + ".docx" mail.send(configuration['mail'], variables, res, res_filename) c['mailSent'] = True # Send a mail regardless of the above that there is a change notification_conf = { "body": "Something changed! Go and visit " + configuration['targetURL'], "subject": "IMPORTANT | The watched website has changed! Go check it immediately!", "recipient": configuration['mail']['notificationRecipient'], "server": configuration['mail']['server'] } if c['mailSent']: notification_conf[ 'body'] += "\n\n Oh and btw I already sent your reservation request ;)\n\n Have a good one!\n - AccommodationBot" mail.send(notification_conf) c['hash'] = page_hash else: print("Boring old same page...") save(c)
def request(self, request_body, verbose=1,update_cache=False): rsp = cache.load(self.uri+request_body) if rsp == None or update_cache: req = urllib2.Request(url=self.uri) req.add_data(data=request_body) headers = [ ('Content-Type', 'application/json; charset=utf-8'), ('Accept-Encoding', 'text'), ('Accept', 'application/json,application/json,application/jsonrequest')] for t,v in headers: req.add_header(t,v) rsp = urllib2.urlopen(req).read() cache.store(self.uri+request_body,rsp) return simplejson.loads(rsp)
def __init__(self): self.geoCache = cache.load('geoCache') self.locations = cache.load('locationList') self.latlng = cache.load('locationLatLng') self.notGeolocated = cache.load('failedToGeolocate') self.geolocator = geocoders.GoogleV3( api_key="AIzaSyBnR6mRCbJ3yPsmhY-btGpfHpHJ_H6fZLI") #AIzaSyBnR6mRCbJ3yPsmhY-btGpfHpHJ_H6fZLI") # AIzaSyChlf0VSGWPD3tbp2fbCWOGoniICT_2owc") # AIzaSyDE3JOGCJJvG7OWo4BIfgW-6bmp5icH840 # Get latitude and longitude for each place name self.get_lat_lng() # Group on these retrieved lat / lng values self.group_on_lat_lng() # Calculate the new default key self.add_default_keys() # Evaluate and provide statistics self.evaluate_processing()
def get_item(url=None, path=None): if path: url = path2url(path) src = cache.load(url) if src: d = json.loads(src) response = Response(*d) else: response = _get_request(url) cache.store(url, json.dumps(response)) if not response.ok: logging.error('Status {} on {}'.format(response.status, response.url)) return None return json.loads(response.content)
def __init__(self, cache): """ Initializes an instance of the Calma class. The Calma class is used for all interfacing with the feature extraction tools available at the end-point provided by Sean Bechhofer's research and work. """ self.sparql = SPARQLWrapper("https://etree.linkedmusic.org/sparql") self.sparql.setReturnFormat(JSON) self.sparql.setMethod("POST") self.keyInfo = None self.loudnessValues = None self.segmentInfo = None self.cache = cache self.calmaCache = cache.load('calmaCache')
def get_instances(file): """ Input: file Output: dictionary moduleName: path : full_path inctances : set() """ if cache.refreshCache(): cache.clean() cached = cache.load(file) if cached: return cached['parsed'] preprDict = vpreprocessor.Preprocessor(file).result res = vparser.Parser(preprDict).result cache.dump(res) return res['parsed']
def get_pushes_jobs_json(args, repo, update_cache=False): """get_pushes_jobs_json Retrieve nested pushes, jobs matching args set via push_args parser and job_args parser. """ if hasattr(args, 'update_cache'): update_cache = args.update_cache cache_attributes_push_jobs = ['treeherder', repo, 'push_jobs'] pushes = get_pushes_json(args, repo, update_cache=update_cache) for push in pushes: push_jobs_data = cache.load(cache_attributes_push_jobs, push['id']) if push_jobs_data and not update_cache: jobs = json.loads(push_jobs_data) else: jobs = retry_client_request(CLIENT.get_jobs, 3, repo, push_id=push['id'], count=None) cache.save(cache_attributes_push_jobs, push['id'], json.dumps(jobs, indent=2)) if not args.job_filters: push['jobs'] = jobs else: push['jobs'] = [] for job in jobs: include = True for filter_name in args.job_filters: include &= args.job_filters[filter_name].search( job[filter_name]) is not None if include: push['jobs'].append(job) if args.add_bugzilla_suggestions: for job in push['jobs']: if job['result'] != 'testfailed': job['bugzilla_suggestions'] = [] continue job['bugzilla_suggestions'] = get_job_bugzilla_suggestions_json( args, repo, job['id'], update_cache=update_cache) return pushes
def send_smtp_test(): c = load() if not c['smtpTestSent']: c['smtpTestSent'] = True save(c) notification_conf = { "body": "This is a test of your smtp settings.\nYour final mail will be sent to " + ", ".join(configuration["mail"]["recipient"]) + ".\n\n- Accommodation Bot", "subject": "SMTP Settings Test!", "recipient": configuration['mail']['notificationRecipient'], "server": configuration['mail']['server'] } mail.send(notification_conf)
def get_job_by_repo_job_id_json(args, repo, job_id, update_cache=False): """get_job_by_repo_job_id_json Retrieve job given args, repo and job_id """ cache_attributes = ['treeherder', repo, 'jobs'] job_data = cache.load(cache_attributes, job_id) if job_data and not update_cache: jobs = [json.loads(job_data)] else: jobs = retry_client_request(CLIENT.get_jobs, 3, repo, id=job_id) if jobs: for job in jobs: cache.save(cache_attributes, job['id'], json.dumps(job, indent=2)) return jobs[0]
def load_cache(self, mode): self.cache = cache.load('{}_threshold_{}_{}'.format(self.c.dataset, self.c.threshold, mode), mode) self.p_size = len(self.cache['foreground']) self.n_size = len(self.cache['background']) self.p_shuffle_counter = self.p_size self.n_shuffle_counter = self.n_size self.p_virtual = list(range(self.p_size)) self.p_physical = list(range(self.p_size)) self.n_virtual = list(range(self.n_size)) self.n_physical = list(range(self.n_size))
def get_bug_job_map_json(args, repo, job_id, update_cache=False): """get_bug_job_map_json Retrieve bug_job_map given args, repo and job_id """ cache_attributes = ['treeherder', repo, 'bug-job-map'] bug_job_map_url = '%s/api/project/%s/bug-job-map/?job_id=%s' % ( (URL, repo, job_id)) bug_job_map_data = cache.load(cache_attributes, job_id) if bug_job_map_data and not update_cache: bug_job_map = json.loads(bug_job_map_data) bug_job_map_data = None else: bug_job_map = utils.get_remote_json(bug_job_map_url) cache.save(cache_attributes, job_id, json.dumps(bug_job_map, indent=2)) return bug_job_map
def add_default_keys(self): # dict = {} dict = cache.load('newReversedGroupedLocations') self.prev_validated = [] # cache.load('previously_validated') for key in self.grouped.keys(): newKey = self.generateKey(key) if newKey is not None: if newKey not in dict.keys(): newKey = newKey.strip() dict[newKey] = {} dict[newKey]['latlng'] = key dict[newKey]['locations'] = self.grouped[key] else: # Append contents of conflicting key to existing key dict[newKey]['locations'] = dict[newKey][ 'locations'] + self.grouped[key] cache.save(dict, 'redo_dict_grouped_locations') cache.save(self.prev_validated, 'redo_previously_validated')
def get_push_json(args, repo, push_id, update_cache=False): """get_pushes_json Retrieve push by push_id. """ cache_attributes = ['treeherder', repo, 'push'] push_params = get_treeherder_push_params(args) push_params['id'] = push_id push = None if not update_cache: push_data = cache.load(cache_attributes, push_params['id']) if push_data: push = json.loads(push_data) return push pushes = retry_client_request(CLIENT.get_pushes, 3, repo, **push_params) if pushes: return pushes[0] return None
def findNewEd2k(self, url): ''' find and cache all ed2k links on a page, but only return new links ''' links = self.findEd2k(url) print 'found %i ed2k links' % len(links) self.ed2k.extend(links) cache_id = hash(url) if cache.has_cache(cache_id): cacheList = cache.load(cache_id) if cacheList == self.ed2k: print 'nothing change. ' + url else: print 'you has new links ' + url newLinks = zip(*self.ed2k)[0] oldLinks = zip(*cacheList)[0] diff = list(set(newLinks).difference( set(oldLinks) )) # lists difference for link in diff: print link pyperclip.copy(link) # TODO else: print 'just cache the links ' + url cache.cache(self.ed2k, cache_id)
def load_zones(path): ''' path: str Path to .gpkg file. Returns: zone_to_canton: `dict` Mapping from zone name to canton code (e.g. 'Dietlikon' -> 'ZH') ''' cachename = path r = cache.load(cachename) if r is not None: return r gdf = gpd.read_file(p) zonenames = list(map(str, gdf.N_Gem)) zonecantons = list(map(str, gdf.N_KT)) zone_to_canton = {} for name, canton in zip(zonenames, zonecantons): zone_to_canton[name] = canton r = zone_to_canton return cache.save(cachename, r)
def __init__(self): self.cache = load() self.endpoint = '127.0.0.1', 53 self.origin = '8.8.8.8', 53
import parse, cache, itertools, files link = parse.base + 'classical/sort=0,start=' prev = cache.load_prev() codes_old = [] for i in itertools.count(start=prev + 1): src = parse.get_source(link + str(i*50)) codes = parse.get_codes(src) if codes == codes_old: break codes_old = codes names, probs, users = cache.load() for p, code in enumerate(codes): parse.print_progress(i, p + 1) parse.problem(code, names, probs, users) if len(codes) == 50: cache.dump(i, names, probs, users) for code, name in names.items(): names[code] = name.encode('utf-8') files.clear() files.write(names, probs, users)
def set_image_over(self, img): self.image_over = cache.load(img)
comment_author, diff_author, colorama.Fore.CYAN, comment_text, ) counts[comment_author] += 1 if args.just_tally: sys.stdout.write('.') sys.stdout.flush() if args.just_tally: print '' print '=== Counts ===' for key, value in counts.items(): print '%s: %s' % (key, value) print 'Total: %s comments on %s diffs' % (sum(counts.values()), total_diffs) if __name__ == '__main__': colorama.init(autoreset=True) cache.load() parser = argparse.ArgumentParser(prog='differential-comments') parser.add_argument('--team', help='Which team from settings to use', **kwargs_or_default(settings.DEFAULT_TEAM)) parser.add_argument('--days', help='How many days back to go', default=30) parser.add_argument('--comment-days', help='How many days back to go for the comments') parser.add_argument('--just-tally', help='Just print the final tally', action='store_true') parser.add_argument('--just-email', help='Just one user by email address') args = parser.parse_args() list(args) cache.update()
def plugin_loaded(): cache.load() style_parser.init_file_loading()
import editdistance import sys import cache # Load list of artists artistList = cache.load('artistList') possibleDuplicates = [] # For each artist for artist in artistList: # Compare against each other artist for otherArtist in artistList: # If we can reach otherArtist from artist in 1-2 letter changes if 0 < editdistance.eval(artist, otherArtist) < 2: # Add to possible duplicates possibleDuplicates.append([artist, otherArtist]) correct = 0 incorrect = 0 processed = [] for artist, otherArtist in possibleDuplicates: # If artist, otherArtist have not been compared against each-other yet if [artist, otherArtist] not in processed and [otherArtist, artist] not in processed: isCorrect = input(str(artist) + ', ' + str(otherArtist) + '\n') # If user says this is a correct assumption if isCorrect == '1': correct += 1 # If user says this is a false positive else:
def summarize_isolation_pushes_jobs_json(args): pushes = [] test_isolation_bugzilla_data = get_test_isolation_bugzilla_data(args) for revision_url in test_isolation_bugzilla_data: revision_data = test_isolation_bugzilla_data[revision_url] new_args = copy.deepcopy(args) new_args.revision_url = revision_url (new_args.repo, _, new_args.revision) = new_args.revision_url.split('/')[-3:] new_args.add_bugzilla_suggestions = True new_args.state = 'completed' new_args.result = 'success|testfailed' new_args.job_type_name = '^test-' new_args.test_failure_pattern = TEST_FAILURE_PATTERN jobs_args.compile_filters(new_args) # Load the pushes/jobs data from cache if it exists. cache_attributes = ['test-isolation', new_args.repo] pushes_jobs_data = cache.load(cache_attributes, new_args.revision) if pushes_jobs_data and not args.update_cache: new_pushes = json.loads(pushes_jobs_data) else: new_pushes = get_pushes_jobs_json(new_args, new_args.repo, update_cache=args.update_cache) cache.save(cache_attributes, new_args.revision, json.dumps(new_pushes, indent=2)) pushes.extend(new_pushes) for revision_bug_data in revision_data: if args.bugs and revision_bug_data['bug_id'] not in args.bugs: # Skip if we requested a specific bug and this is not it. continue if args.bugs and args.override_bug_summary: revision_bug_data[ 'bug_summary'] = bugzilla_summary_munge_failure( args.override_bug_summary) pushes_jobs_data = None data = convert_pushes_to_test_isolation_bugzilla_data(args, pushes) #logger.info('convert_pushes_to_test_isolation_bugzilla_data\n{}'.format( # json.dumps(data, indent=2))) summary = {} for revision_url in data: (repo, _, revision) = revision_url.split('/')[-3:] if revision_url not in summary: summary[revision_url] = {} summary_revision = summary[revision_url] job_type_names = sorted(data[revision_url].keys()) for job_type_name in job_type_names: if job_type_name not in summary_revision: summary_revision[job_type_name] = dict( notes=[], isolation_job= "{}/#/jobs?repo={}&tier=1%2C2%2C3&revision={}&searchStr={}" .format(args.treeherder_url, repo, revision, job_type_name), ) summary_revision_job_type = summary_revision[job_type_name] job_type = data[revision_url][job_type_name] if 'bugzilla_data' not in summary_revision_job_type: summary_revision_job_type['bugzilla_data'] = copy.deepcopy( test_isolation_bugzilla_data[revision_url]) for bug_data in summary_revision_job_type['bugzilla_data']: # bug_data['failure_reproduced'][section_name] counts the # number of times the original bug_summary failure # was seen in that section of jobs. bug_data['failure_reproduced'] = dict( original=0, repeated=0, id=0, it=0, ) # bug_data['test_reproduced'][section_name] counts the # number of times the original bug_summary test # was seen in that section of jobs. bug_data['test_reproduced'] = dict( original=0, repeated=0, id=0, it=0, ) for section_name in (ORIGINAL_SECTIONS + ISOLATION_SECTIONS): if section_name not in summary_revision_job_type: summary_revision_job_type[section_name] = dict( failures={}, tests={}, failure_reproduced=0, test_reproduced=0, ) if section_name == 'original': summary_revision_job_type[section_name][ 'bug_job_map'] = [] summary_revision_job_type_section = summary_revision_job_type[ section_name] job_type_section = job_type[section_name] run_time = 0 jobs_testfailed_count = 0 bugzilla_suggestions_count = 0 for job in job_type_section: if section_name == 'original': summary_revision_job_type_section[ 'bug_job_map'].extend(job['bug_job_map']) run_time += job['end_timestamp'] - job['start_timestamp'] jobs_testfailed_count += 1 if job[ 'result'] == 'testfailed' else 0 bugzilla_suggestions_count += len( job['bugzilla_suggestions']) for bugzilla_suggestion in job['bugzilla_suggestions']: #failure = bugzilla_summary_munge_failure(bugzilla_suggestion['search']) failure = bugzilla_suggestion['search'] if failure not in summary_revision_job_type_section[ 'failures']: summary_revision_job_type_section['failures'][ failure] = dict( count=0, failure_reproduced=0, ) summary_revision_job_type_section['failures'][failure][ 'count'] += 1 for bug_data in summary_revision_job_type[ 'bugzilla_data']: if args.bugs and args.override_bug_summary: #pattern = convert_failure_to_pattern(bugzilla_summary_munge_failure(args.override_bug_summary)) pattern = convert_failure_to_pattern( args.override_bug_summary) else: pattern = bug_data['pattern'] if re.compile(pattern).search(failure): bug_data['failure_reproduced'][ section_name] += 1 summary_revision_job_type_section['failures'][ failure]['failure_reproduced'] += 1 summary_revision_job_type_section[ 'failure_reproduced'] += 1 test = get_test(failure) if test: if test not in summary_revision_job_type_section[ 'tests']: summary_revision_job_type_section['tests'][ test] = dict( count=0, test_reproduced=0, ) summary_revision_job_type_section['tests'][ test]['count'] += 1 if args.bugs and args.override_bug_summary: bug_data_test = get_test( args.override_bug_summary) else: bug_data_test = bug_data['test'] if bug_data_test and test in bug_data_test: bug_data['test_reproduced'][ section_name] += 1 summary_revision_job_type_section['tests'][ test]['test_reproduced'] += 1 summary_revision_job_type_section[ 'test_reproduced'] += 1 summary_revision_job_type_section['run_time'] = run_time summary_revision_job_type_section[ 'jobs_testfailed'] = jobs_testfailed_count summary_revision_job_type_section['jobs_total'] = len( job_type_section) summary_revision_job_type_section[ 'bugzilla_suggestions_count'] = bugzilla_suggestions_count return summary
def get_test_isolation_bugzilla_data(args): """Query Bugzilla for bugs marked with [test isolation] in the whiteboard. Return a dictionary keyed by revision url containing the bug id and summary. """ cache_attributes = ['test-isolation'] bugzilla_data = cache.load(cache_attributes, 'bugzilla.json') if bugzilla_data and not args.update_cache: return json.loads(bugzilla_data) now = datetime.datetime.now() data = {} re_logview = re.compile( r'https://treeherder.mozilla.org/logviewer.html#\?job_id=([0-9]+)&repo=([a-z-]+)' ) re_pushlog_url = re.compile(r'(https://.*)$\n', re.MULTILINE) query = BUGZILLA_URL + 'bug?' query_terms = { 'include_fields': 'id,creation_time,whiteboard', 'creation_time': args.bug_creation_time, 'whiteboard': args.whiteboard, 'limit': 100, 'offset': 0, } if args.bugs: query_terms['id'] = ','.join([str(id) for id in args.bugs]) else: query_terms['creation_time'] = args.bug_creation_time while True: response = utils.get_remote_json(query, params=query_terms) if 'error' in response: logger.error('Bugzilla({}, {}): {}'.format(query, query_terms, response)) return if len(response['bugs']) == 0: break # update query terms for next iteration of the loop. query_terms['offset'] += query_terms['limit'] for bug in response['bugs']: #https://bugzilla.mozilla.org/rest/bug/1559260/comment if args.bugs_after and bug['id'] <= args.bugs_after: continue if args.whiteboard not in bug['whiteboard']: # The query performs an all words not substring # query, so restrict to the substring. continue if args.bugs and bug['id'] not in args.bugs: continue query2 = BUGZILLA_URL + 'bug/%s' % bug['id'] response2 = utils.get_remote_json(query2) if 'error' in response2: logger.error('Bugzilla({}): {}'.format(query2, response2)) return bug_summary = response2['bugs'][0]['summary'] munged_bug_summary = bugzilla_summary_munge_failure(bug_summary) query3 = BUGZILLA_URL + 'bug/%s/comment' % bug['id'] response3 = utils.get_remote_json(query3) if 'error' in response3: logger.error('Bugzilla({}): {}'.format(query, response3)) return raw_text = response3['bugs'][str( bug['id'])]['comments'][0]['raw_text'] match = re_logview.search(raw_text) if match: # Get push associated with this failed job. job_id = int(match.group(1)) repo = match.group(2) job = get_job_by_repo_job_id_json( args, repo, job_id, update_cache=args.update_cache) push_id = job['push_id'] push = get_push_json(args, repo, push_id, update_cache=args.update_cache) repository = get_repository_by_id( push['revisions'][0]['repository_id']) revision = push['revisions'][0]['revision'] revision_url = '%s/rev/%s' % (repository['url'], revision) new_args = copy.deepcopy(args) new_args.revision_url = revision_url (new_args.repo, _, new_args.revision) = new_args.revision_url.split('/')[-3:] new_args.add_bugzilla_suggestions = True new_args.state = 'completed' new_args.result = 'success|testfailed' #new_args.job_type_name = '^test-' new_args.job_type_name = job['job_type_name'] new_args.test_failure_pattern = TEST_FAILURE_PATTERN pushes_args.compile_filters(new_args) jobs_args.compile_filters(new_args) if revision_url not in data: data[revision_url] = [] mozharness_failure = match_bug_summary_to_mozharness_failure( bug_summary, raw_text) test = None if mozharness_failure: test = get_test(mozharness_failure) pattern = convert_failure_to_pattern(mozharness_failure) if not test: test = get_test(munged_bug_summary) pattern = convert_failure_to_pattern(munged_bug_summary) if not test: logger.warning('Unable to obtain test for ' 'bug {} {} failure {}'.format( bug['id'], bug_summary, mozharness_failure)) bug_data = { 'bug_id': bug['id'], 'bug_summary': bug_summary, 'munged_bug_summary': munged_bug_summary, 'job_type_name': job['job_type_name'], 'test': test, 'mozharness_failure': mozharness_failure, 'job_id': job_id, 'push_id': push_id, 'repository': repository['name'], 'revision_url': revision_url, 'bugzilla_suggestions': get_job_bugzilla_suggestions_json( new_args, new_args.repo, job_id, update_cache=args.update_cache), 'bug_job_map': get_bug_job_map_json(new_args, new_args.repo, job_id, update_cache=args.update_cache), 'pattern': pattern, } data[revision_url].append(bug_data) # Get failure counts for trunk for this bug for the two weeks following # the creation of the bug. Ignore failure counts for bugs who are less # than 2 weeks old. # TODO: Allow in place updating of bugzilla.json so that we can reprocess # the failure counts without having to query the full set of bugs. start_date = datetime.datetime.strptime( bug['creation_time'].rstrip('Z'), '%Y-%m-%dT%H:%M:%S') - datetime.timedelta(days=1) end_date = start_date + datetime.timedelta(days=15) failure_count_json = get_failure_count_json( args, 'trunk', bug['id'], start_date, end_date) if now - start_date < datetime.timedelta(days=15): failure_count = None else: failure_count = 0 for failures in failure_count_json: failure_count += failures['failure_count'] bug_data['failure_count'] = failure_count elif args.whiteboard and False: #Disable this as it is buggy. # This run has specified the test or is this is a bug # that is not a Treeherder filed bug. If it was marked # via the whiteboad then we are interested in the # pushes for this bug. Since we can't really tell # which is which, we can include all of the pushes # since only those with test isolation jobs will # matter. The problem is this bug does not # necessarily have a bug_summary referencing a test # failure... test = None # We don't have a failure in this case. comments = response3['bugs'][str(bug['id'])]['comments'] for comment in comments: if not comment['raw_text'].startswith('Pushed by'): continue # Get the last revision in the comment as the head of the push. revision_url = None pushlog_url_match = re_pushlog_url.search( comment['raw_text']) while pushlog_url_match: revision_url = pushlog_url_match.group(1) pushlog_url_match = re_pushlog_url.search( comment['raw_text'], pushlog_url_match.end(1)) if revision_url: # revision_url from Bugzilla has the 12 character revision. new_args = copy.deepcopy(args) new_args.revision_url = revision_url (new_args.repo, _, new_args.revision ) = new_args.revision_url.split('/')[-3:] new_args.add_bugzilla_suggestions = True new_args.state = 'completed' new_args.job_type_name = '^test-' new_args.test_failure_pattern = TEST_FAILURE_PATTERN pushes_args.compile_filters(new_args) jobs_args.compile_filters(new_args) pushes = get_pushes_jobs_json( new_args, new_args.repo, update_cache=args.update_cache) if len(pushes): # Convert the revision url to 40 characters. push = pushes[0] repository = get_repository_by_id( push['revisions'][0]['repository_id']) revision = push['revisions'][0]['revision'] revision_url = '%s/rev/%s' % (repository['url'], revision) new_args.revision_url = revision_url (new_args.repo, _, new_args.revision ) = new_args.revision_url.split('/')[-3:] if revision_url not in data: data[revision_url] = [] push_id = push['id'] repository = get_repository_by_id( push['revisions'][0]['repository_id']) # Only the original job is of interest for collecting the bugzilla data. # The others are the retriggers. # There shouldn't be a bug_job_map or bugzilla_suggestions for non-classified bugs. job_id = push['jobs'][0] bug_data = { 'bug_id': bug['id'], 'bug_summary': bug_summary, 'test': test, 'job_id': job_id, 'push_id': push_id, 'repository': repository['name'], 'revision_url': revision_url, 'bugzilla_suggestions': [], 'bug_job_map': [], 'pattern': convert_failure_to_pattern(bug_summary), } data[revision_url].append(bug_data) # Get failure counts for trunk for this bug for the two weeks following # the creation of the bug. Ignore failure counts for bugs who are less # than 2 weeks old. Use the previous day for the start date and 15 days # to account for timezone issues. # TODO: Allow in place updating of bugzilla.json so that we can reprocess # the failure counts without having to query the full set of bugs. start_date = datetime.datetime.strptime( bug['creation_time'].rstrip('Z'), '%Y-%m-%dT%H:%M:%S') - datetime.timedelta( days=1) end_date = start_date + datetime.timedelta(days=15) failure_count_json = get_failure_count_json( args, 'trunk', bug['id'], start_date, end_date) if now - start_date < datetime.timedelta(days=15): failure_count = None else: failure_count = 0 for failures in failure_count_json: failure_count += failures['failure_count'] bug_data['failure_count'] = failure_count cache.save(cache_attributes, 'bugzilla.json', json.dumps(data, indent=2)) return data
def draw_image(self, img): self.image = cache.load(img) self.rect.size = self.image.get_size()
from SPARQLWrapper import SPARQLWrapper, JSON import collections, re from fuzzywuzzy import fuzz finalFrequency = {} import string import numpy as np import sklearn.cluster from nltk.corpus import stopwords import distance import enchant global groups s = set(stopwords.words('english')) from rdflib.namespace import SKOS import cache artistList = sorted(cache.load('artistList')) genreList = sorted(cache.load('genreList')) locationList = sorted(cache.load('locationList')) words = np.asarray(list(genreList)) # So that indexing with a list will work print(str('Calculating levenstein similarity')) lev_similarity = -1 * np.array([[distance.levenshtein(w1, w2) for w1 in words] for w2 in words]) print(str('Clustering begin')) affprop = sklearn.cluster.AffinityPropagation(affinity="precomputed", damping=0.5) affprop.fit(lev_similarity) groups = {} for cluster_id in np.unique(affprop.labels_): print(str(cluster_id))
def __init__(self): self.locations = cache.load('newReversedGroupedLocations') countries = [] for key in self.locations.keys(): countries.append(key.split(',')[-1].rstrip()) countries = list(set(countries)) translation = { 'Slovaka': 'Slovakia', 'Trinidad and Tobao': 'Trinidad and Tobago', 'Luxemboug': 'Luxembourg', 'Icelad': 'Iceland', 'Cua': 'Cuba', 'Brazl': 'Brazil', 'Belgim': 'Belgium', 'Portugl': 'Portugal', 'Pakistn': 'Pakistan', 'Moroco': 'Morroco', 'Swedn': 'Sweden', 'Costa Ria': 'Costa Rica', 'Ecuadr': 'Eduador', 'Canaa': 'Canada', 'Greee': 'Greece', #' K' : 'UK', 'Austra': 'Austria', 'Australa': 'Australia', 'Czechna': 'Czechnia', 'Iceld': 'Iceland', 'Peu': 'Peru', 'Itay': 'Italy', 'The Bahams': 'The Bahamas', 'Netherlans': 'Netherlands', 'Span': 'Spain', 'Denmak': 'Denmark', 'Hong Kog': 'Hong Kong', 'Isral': 'Israel', 'Lithuana': 'Lithuania', 'Germay': 'Germany', 'Norwy': 'Norway', 'Jamaia': 'Jamaica', 'Polad': 'Poland', 'Nicaraga': 'Nicaragra', 'Frane': 'France', 'Serba': 'Serbia', 'UA': 'USA', 'Hungay': 'Hungry', 'Switzerlad': 'Switzerland', 'Austriala': 'Australia', 'SSolomon Islans': 'Solomon Islands', 'Boliva': 'Bolivia' } new_dict = {} for key in self.locations.keys(): oldCountry = key[key.rfind(',') + 2:] newCountry = oldCountry if newCountry == 'K': newCountry = 'UK' for country_key in translation.keys(): newCountry = newCountry.replace( country_key, translation[country_key]).rstrip() newKey = key[:key.rfind(',') + 2] + newCountry new_dict[newKey] = self.locations[key] cache.save(new_dict, 'newReversedGroupedLocations')
def get_pushes_jobs_job_details_json(args, repo, update_cache=False): """get_pushes_jobs_job_details_json Retrieve nested pushes, jobs, job details matching args set via push_args parser and job_args parser. """ if hasattr(args, 'update_cache'): update_cache = args.update_cache cache_attributes = ['treeherder', repo, 'job_details'] pushes = get_pushes_jobs_json(args, repo, update_cache=update_cache) for push in pushes: for job in push['jobs']: # job['job_guid'] contains a slash followed by the run number. # Convert this into a value which can be used a file name # by replacing / with _. job_guid_path = job['job_guid'].replace('/', '_') job_details_data = cache.load(cache_attributes, job_guid_path) if job_details_data and not update_cache: job['job_details'] = json.loads(job_details_data) else: job['job_details'] = [] # We can get all of the job details from CLIENT.get_job_details while # get_job_log_url only gives us live_backing.log and live.log. job['job_details'] = retry_client_request( CLIENT.get_job_details, 3, job_guid=job['job_guid']) if job['job_details'] is None: logger.warning("Unable to get job_details for job_guid %s", job['job_guid']) continue cache.save(cache_attributes, job_guid_path, json.dumps(job['job_details'], indent=2)) if hasattr(args, 'add_resource_usage') and args.add_resource_usage: for attempt in range(3): try: for job_detail in job['job_details']: if job_detail['value'] == 'resource-usage.json': resource_usage_name = job_guid_path + '-' + job_detail[ 'value'] job_detail_resource_usage_data = cache.load( cache_attributes, resource_usage_name) if job_detail_resource_usage_data and not update_cache: job['resource_usage'] = json.loads( job_detail_resource_usage_data) job_detail_resource_usage_data = None else: job['resource_usage'] = utils.get_remote_json( job_detail['url']) cache.save( cache_attributes, resource_usage_name, json.dumps(job['resource_usage'], indent=2)) break break except requests.HTTPError as e: if '503 Server Error' not in str(e): raise logger.exception( 'get_job_details resource %s attempt %s', attempt) except requests.ConnectionError: logger.exception( 'get_job_details resource %s attempt %s', attempt) if attempt != 2: time.sleep(30) if attempt == 2: logger.warning("Unable to get job_details for job_guid %s", job['job_guid']) continue return pushes
def set_image(self, img): self.image = cache.load(img)