Пример #1
0
def simulate(cache, trace):
    global misses
    global hits
    for line in trace:
        splitLine = line.split()
        if (len(splitLine) == 3):
            trash, op, address = splitLine
            if op == 'R':
                result = cache.read(address)
                if (result == 0):
                    misses += 1
                    cache.load(address)
                    cache.read(address)
                else:
                    hits += 1

            else:
                result = cache.write(address)
                if (result == 0):
                    misses += 1
                    cache.load(address)
                    cache.write(address)
                else:
                    hits += 1
    print_results(misses, hits)
Пример #2
0
def getStandings(teams):
	key = "standings_" + "_".join(teams)
	standings = load(key)
	now = datetime.now()

	if standings is None:
		data = urllib2.urlopen("http://mlb.mlb.com/lookup/json/named.standings_schedule_date.bam?season=%d&schedule_game_date.game_date='%s'&sit_code='h0'&league_id=103&league_id=104&all_star_sw='N'&version=2" % (now.year, now.strftime("%Y/%m/%d")))
		data = json.load(data)["standings_schedule_date"]["standings_all_date_rptr"]["standings_all_date"]

		rows = data[0]["queryResults"]["row"] + data[1]["queryResults"]["row"]

		standings = []

		for row in rows:
			found = getTeam(row["team_abbrev"])

			if found is not None and row["team_abbrev"] in teams:
				standings.append({
					"name": found.name,
					"abbrev": found.code,
					"subreddit": found.subreddit,
					"wins": int(row["w"]),
					"losses": int(row["l"]),
					"percent": row["pct"],
					"games_back": row["gb"]
				})

		standings.sort(lambda a, b: -1 if a["percent"] > b["percent"] else 1 if a["percent"] < b["percent"] else -1 if a["wins"] > b["wins"] else 1 if a["wins"] < b["wins"] else 0)

		save(key, standings, 30)

	return standings
Пример #3
0
	def connFromId(id, hafas = None):
		if hafas is None:
			hafas = Hafas
		if cache.in_cache(id):
			c = cache.load(id)
			return (c.sections[0].train, c)
		else:
			sid = "{}".format(int(id, 16))
			sourceId = sid[1:10]
			destinationId = sid[10:19]
			date = datetime.strptime(sid[19:], '%Y%m%d%H%M')
			source = HafasStation('dummy', sourceId, [])
			destination = HafasStation('dummy', destinationId, [])
			cl = hafas.searchConnections(source, destination, date,1)
	
			I = 10

			if datetime.combine(cl[-1].date, cl[-1].departure.time) != date:
				while datetime.combine(cl[-1].date, cl[-1].departure.time) < date:
		
					next(cl)
	
					if I == 0:
						break
					I -= 1
	
			c = cl[-1]
			c.queryRelation(hafas)

			cache.save(c, id)

			return (c.sections[0].train, c)
Пример #4
0
 def load_from_path(self, path, project_root):
     self.clear()
     # Tables
     longpath = os.path.join(path, 'tables/*.yml')
     for filename in glob.glob(longpath):
         t = cache.load(project_root, filename, Table)
         self.add_table(t)
Пример #5
0
def load_matrix(path):
    '''
    path: `str`
        Path to .mtx file.

    Returns:
    matrix: `numpy.ndarray(np.float32)`, (N,N)
        Number of people traveling from zone `i` to zone `j` in `matrix[i,j]`.
    zones: `numpy.ndarray(str)`, (N)
        Name of zone `i` in `zones[i]`.
    '''
    cachename = path
    r = cache.load(cachename)
    if r is not None: return r

    m = ReadPTVMatrix(filename=p)
    matrix = m['matrix'].astype(np.float32)
    ids = [int(z.coords['zone_no'].data) for z in m['zone_name']]

    origins = [int(v.data) for v in matrix['origins']]
    destinations = [int(v.data) for v in matrix['destinations']]
    assert origins == ids, \
            "different order in matrix['origins'] and zone_name"
    assert destinations == ids, \
            "different order in matrix['destinations'] and zone_name"

    zonenames = np.array([str(z.data) for z in m['zone_name']])

    r = matrix.data, zonenames
    return cache.save(cachename, r)
Пример #6
0
 def get_version_by_name(self, name):
     filename = "/versions/%s.yml" % name
     if os.path.exists(self.project_root + filename):
         version = cache.load(self.project_root, filename, Version)
         return version
     else:
         return None
Пример #7
0
def get_job_bugzilla_suggestions_json(args,
                                      repo,
                                      job_id,
                                      include_related_bugs=False,
                                      update_cache=False):
    """get_job_bugzilla_suggestions_json

    Retrieve job_bugzilla_suggestions given args, and job_id

    """
    cache_attributes = ['treeherder', repo, 'bugzilla_suggestions']

    suggestions_data = cache.load(cache_attributes, job_id)
    if suggestions_data and not update_cache:
        suggestions = json.loads(suggestions_data)
    else:
        bugzilla_suggestions_url = '%s/api/project/%s/jobs/%s/bug_suggestions/' % (
            (URL, repo, job_id))

        suggestions = utils.get_remote_json(bugzilla_suggestions_url)
        cache.save(cache_attributes, job_id, json.dumps(suggestions, indent=2))

    if args.test_failure_pattern:
        bugzilla_suggestions = [
            suggestion for suggestion in suggestions
            if args.test_failure_pattern.search(suggestion['search'])
        ]
    else:
        bugzilla_suggestions = suggestions

    if not include_related_bugs:
        for bug_data in bugzilla_suggestions:
            del bug_data['bugs']

    return bugzilla_suggestions
Пример #8
0
    def __init__(self):
        self.sparql = SPARQLWrapper("http://etree.linkedmusic.org/sparql")
        self.sparql.setReturnFormat(JSON)
        self.sparql.setMethod("POST")

        performances = cache.load('list_all_performances')
        # performances = self.get_all_performances()
        # cache.save(performances, 'list_all_performances')
        print('Got perm')
        self.examine_tracklists(performances)
Пример #9
0
 def test_save_cache(self):
     with fixtures.TestFixture("head_test1test2", clean=True) as tf:
         project_path = tf.temp_path
         filename = "/versions/head.yml"
         expected = schema.Version()
         content = fixtures.read_from_file(project_path + filename)
         expected.load_from_str(content)
         cached = cache.load(project_path, filename, schema.Version)
         assert cached == expected
         print(project_path + "/.cache" + filename)
         assert os.path.exists(project_path + "/.cache" + filename + ".pickle")
Пример #10
0
def check_page():
    page = crawl(configuration['targetURL'])  # .decode("utf8")
    page_hash = md5(page)
    c = load()
    if not c['hash'] == page_hash:
        print("HASH CHANGED! (" + page_hash + ")")

        # Run a background thread to archive the page in the web archive
        start_new_thread(crawl, ("https://web.archive.org/save/" +
                                 configuration['targetURL'], False))

        # Check if the file is online and we didn't sent the mail already (if so send it)
        match = parse(page.decode('utf8'))
        if match is not None and not c['mailSent']:
            print(
                "FILE IS ONLINE! Sending mails ... (and we didn't sent them already)"
            )
            docx = crawl(match)
            for person_details in configuration['details']:
                variables = {
                    "name": person_details['name'],
                    "year": person_details['targetYear'],
                    "quarter": person_details['quarter'],
                    "mail": person_details['mail'],
                    "streetAndCity": person_details['streetAndCity'],
                    "phone": person_details['phone'],
                    "matrikelnr": person_details['matrikelnr']
                }
                res = parser.update_document_contents(docx, person_details)
                res_filename = "Antrag Wohnheimzimmer " + variables[
                    'quarter'] + " " + variables['year'] + ".docx"
                mail.send(configuration['mail'], variables, res, res_filename)
            c['mailSent'] = True

        # Send a mail regardless of the above that there is a change
        notification_conf = {
            "body":
            "Something changed! Go and visit " + configuration['targetURL'],
            "subject":
            "IMPORTANT | The watched website has changed! Go check it immediately!",
            "recipient": configuration['mail']['notificationRecipient'],
            "server": configuration['mail']['server']
        }
        if c['mailSent']:
            notification_conf[
                'body'] += "\n\n Oh and btw I already sent your reservation request ;)\n\n Have a good one!\n - AccommodationBot"
        mail.send(notification_conf)

        c['hash'] = page_hash
    else:
        print("Boring old same page...")

    save(c)
Пример #11
0
 def request(self, request_body, verbose=1,update_cache=False):
     rsp = cache.load(self.uri+request_body)
     if rsp == None or update_cache:
         req = urllib2.Request(url=self.uri)
         req.add_data(data=request_body)
         headers =  [ ('Content-Type', 'application/json; charset=utf-8'),
         ('Accept-Encoding', 'text'),
         ('Accept', 'application/json,application/json,application/jsonrequest')]
         for t,v in headers:
             req.add_header(t,v)
         rsp = urllib2.urlopen(req).read()
         cache.store(self.uri+request_body,rsp)
     return simplejson.loads(rsp)
Пример #12
0
    def __init__(self):
        self.geoCache = cache.load('geoCache')
        self.locations = cache.load('locationList')
        self.latlng = cache.load('locationLatLng')
        self.notGeolocated = cache.load('failedToGeolocate')
        self.geolocator = geocoders.GoogleV3(
            api_key="AIzaSyBnR6mRCbJ3yPsmhY-btGpfHpHJ_H6fZLI")
        #AIzaSyBnR6mRCbJ3yPsmhY-btGpfHpHJ_H6fZLI")
        # AIzaSyChlf0VSGWPD3tbp2fbCWOGoniICT_2owc")
        # AIzaSyDE3JOGCJJvG7OWo4BIfgW-6bmp5icH840

        # Get latitude and longitude for each place name
        self.get_lat_lng()

        # Group on these retrieved lat / lng values
        self.group_on_lat_lng()

        # Calculate the new default key
        self.add_default_keys()

        # Evaluate and provide statistics
        self.evaluate_processing()
Пример #13
0
def get_item(url=None, path=None):
    if path:
        url = path2url(path)
    src = cache.load(url)
    if src:
        d = json.loads(src)
        response = Response(*d)
    else:
        response = _get_request(url)
        cache.store(url, json.dumps(response))
    if not response.ok:
        logging.error('Status {} on {}'.format(response.status, response.url))
        return None
    return json.loads(response.content)
Пример #14
0
    def __init__(self, cache):
        """
    Initializes an instance of the Calma class.

    The Calma class is used for all interfacing with the feature extraction tools available at the end-point provided by Sean Bechhofer's research and work.

    """
        self.sparql = SPARQLWrapper("https://etree.linkedmusic.org/sparql")
        self.sparql.setReturnFormat(JSON)
        self.sparql.setMethod("POST")
        self.keyInfo = None
        self.loudnessValues = None
        self.segmentInfo = None
        self.cache = cache
        self.calmaCache = cache.load('calmaCache')
Пример #15
0
def get_instances(file):
  """
  Input: file
  Output: dictionary
  moduleName: path      : full_path
              inctances : set()
  """
  if cache.refreshCache():
    cache.clean()
  cached = cache.load(file)
  if cached:
    return cached['parsed']
  preprDict = vpreprocessor.Preprocessor(file).result
  res = vparser.Parser(preprDict).result
  cache.dump(res)
  return res['parsed']
Пример #16
0
def get_pushes_jobs_json(args, repo, update_cache=False):
    """get_pushes_jobs_json

    Retrieve nested pushes, jobs matching args set via push_args
    parser and job_args parser.

    """
    if hasattr(args, 'update_cache'):
        update_cache = args.update_cache

    cache_attributes_push_jobs = ['treeherder', repo, 'push_jobs']

    pushes = get_pushes_json(args, repo, update_cache=update_cache)

    for push in pushes:
        push_jobs_data = cache.load(cache_attributes_push_jobs, push['id'])
        if push_jobs_data and not update_cache:
            jobs = json.loads(push_jobs_data)
        else:
            jobs = retry_client_request(CLIENT.get_jobs,
                                        3,
                                        repo,
                                        push_id=push['id'],
                                        count=None)
            cache.save(cache_attributes_push_jobs, push['id'],
                       json.dumps(jobs, indent=2))

        if not args.job_filters:
            push['jobs'] = jobs
        else:
            push['jobs'] = []
            for job in jobs:
                include = True
                for filter_name in args.job_filters:
                    include &= args.job_filters[filter_name].search(
                        job[filter_name]) is not None
                if include:
                    push['jobs'].append(job)
        if args.add_bugzilla_suggestions:
            for job in push['jobs']:
                if job['result'] != 'testfailed':
                    job['bugzilla_suggestions'] = []
                    continue
                job['bugzilla_suggestions'] = get_job_bugzilla_suggestions_json(
                    args, repo, job['id'], update_cache=update_cache)
    return pushes
Пример #17
0
def send_smtp_test():
    c = load()
    if not c['smtpTestSent']:
        c['smtpTestSent'] = True
        save(c)
        notification_conf = {
            "body":
            "This is a test of your smtp settings.\nYour final mail will be sent to "
            + ", ".join(configuration["mail"]["recipient"]) +
            ".\n\n- Accommodation Bot",
            "subject":
            "SMTP Settings Test!",
            "recipient":
            configuration['mail']['notificationRecipient'],
            "server":
            configuration['mail']['server']
        }
        mail.send(notification_conf)
Пример #18
0
def get_job_by_repo_job_id_json(args, repo, job_id, update_cache=False):
    """get_job_by_repo_job_id_json

    Retrieve job given args, repo and job_id

    """
    cache_attributes = ['treeherder', repo, 'jobs']

    job_data = cache.load(cache_attributes, job_id)
    if job_data and not update_cache:
        jobs = [json.loads(job_data)]
    else:
        jobs = retry_client_request(CLIENT.get_jobs, 3, repo, id=job_id)
        if jobs:
            for job in jobs:
                cache.save(cache_attributes, job['id'],
                           json.dumps(job, indent=2))

    return jobs[0]
Пример #19
0
    def load_cache(self, mode):

        self.cache = cache.load('{}_threshold_{}_{}'.format(self.c.dataset, self.c.threshold, mode), mode)

        self.p_size = len(self.cache['foreground'])

        self.n_size = len(self.cache['background'])

        self.p_shuffle_counter = self.p_size

        self.n_shuffle_counter = self.n_size
        
        self.p_virtual = list(range(self.p_size))
        
        self.p_physical = list(range(self.p_size))
        
        self.n_virtual = list(range(self.n_size))

        self.n_physical = list(range(self.n_size))
Пример #20
0
def get_bug_job_map_json(args, repo, job_id, update_cache=False):
    """get_bug_job_map_json

    Retrieve bug_job_map given args, repo and job_id

    """
    cache_attributes = ['treeherder', repo, 'bug-job-map']

    bug_job_map_url = '%s/api/project/%s/bug-job-map/?job_id=%s' % (
        (URL, repo, job_id))

    bug_job_map_data = cache.load(cache_attributes, job_id)
    if bug_job_map_data and not update_cache:
        bug_job_map = json.loads(bug_job_map_data)
        bug_job_map_data = None
    else:
        bug_job_map = utils.get_remote_json(bug_job_map_url)
        cache.save(cache_attributes, job_id, json.dumps(bug_job_map, indent=2))

    return bug_job_map
Пример #21
0
    def add_default_keys(self):
        # dict = {}
        dict = cache.load('newReversedGroupedLocations')

        self.prev_validated = []  # cache.load('previously_validated')

        for key in self.grouped.keys():
            newKey = self.generateKey(key)
            if newKey is not None:
                if newKey not in dict.keys():
                    newKey = newKey.strip()
                    dict[newKey] = {}
                    dict[newKey]['latlng'] = key
                    dict[newKey]['locations'] = self.grouped[key]
                else:
                    # Append contents of conflicting key to existing key
                    dict[newKey]['locations'] = dict[newKey][
                        'locations'] + self.grouped[key]
                cache.save(dict, 'redo_dict_grouped_locations')
                cache.save(self.prev_validated, 'redo_previously_validated')
Пример #22
0
def get_push_json(args, repo, push_id, update_cache=False):
    """get_pushes_json

    Retrieve push by push_id.
    """
    cache_attributes = ['treeherder', repo, 'push']

    push_params = get_treeherder_push_params(args)
    push_params['id'] = push_id

    push = None
    if not update_cache:
        push_data = cache.load(cache_attributes, push_params['id'])
        if push_data:
            push = json.loads(push_data)
            return push

    pushes = retry_client_request(CLIENT.get_pushes, 3, repo, **push_params)
    if pushes:
        return pushes[0]
    return None
Пример #23
0
 def findNewEd2k(self, url):
     '''
     find and cache all ed2k links on a page, but only return new links
     '''
     links = self.findEd2k(url)
     print 'found %i ed2k links' % len(links)
     self.ed2k.extend(links)
     cache_id = hash(url)
     if cache.has_cache(cache_id):
         cacheList = cache.load(cache_id)
         if cacheList == self.ed2k:
             print 'nothing change. ' + url
         else: 
             print 'you has new links ' + url
             newLinks = zip(*self.ed2k)[0]
             oldLinks = zip(*cacheList)[0]
             diff = list(set(newLinks).difference( set(oldLinks) )) # lists difference
             for link in diff:
                 print link
                 pyperclip.copy(link) # TODO
     else:
         print 'just cache the links ' + url
     cache.cache(self.ed2k, cache_id)
Пример #24
0
def load_zones(path):
    '''
    path: str
        Path to .gpkg file.

    Returns:
    zone_to_canton: `dict`
        Mapping from zone name to canton code (e.g. 'Dietlikon' -> 'ZH')
    '''
    cachename = path
    r = cache.load(cachename)
    if r is not None: return r

    gdf = gpd.read_file(p)
    zonenames = list(map(str, gdf.N_Gem))
    zonecantons = list(map(str, gdf.N_KT))

    zone_to_canton = {}

    for name, canton in zip(zonenames, zonecantons):
        zone_to_canton[name] = canton

    r = zone_to_canton
    return cache.save(cachename, r)
Пример #25
0
 def __init__(self):
     self.cache = load()
     self.endpoint = '127.0.0.1', 53
     self.origin = '8.8.8.8', 53
Пример #26
0
import parse, cache, itertools, files

link = parse.base + 'classical/sort=0,start='

prev = cache.load_prev()
codes_old = []
for i in itertools.count(start=prev + 1):
    src = parse.get_source(link + str(i*50))
    codes = parse.get_codes(src)
    if codes == codes_old:
        break
    codes_old = codes
    names, probs, users = cache.load()
    for p, code in enumerate(codes):
        parse.print_progress(i, p + 1)
        parse.problem(code, names, probs, users)
    if len(codes) == 50:
        cache.dump(i, names, probs, users)

for code, name in names.items():
    names[code] = name.encode('utf-8')

files.clear()
files.write(names, probs, users)
Пример #27
0
 def set_image_over(self, img):
     self.image_over = cache.load(img)
Пример #28
0
                    comment_author,
                    diff_author,
                    colorama.Fore.CYAN,
                    comment_text,
                )
            counts[comment_author] += 1
            if args.just_tally:
                sys.stdout.write('.')
                sys.stdout.flush()
    if args.just_tally:
        print ''
    print '=== Counts ==='
    for key, value in counts.items():
        print '%s: %s' % (key, value)
    print 'Total: %s comments on %s diffs' % (sum(counts.values()), total_diffs)


if __name__ == '__main__':
    colorama.init(autoreset=True)
    cache.load()
    parser = argparse.ArgumentParser(prog='differential-comments')
    parser.add_argument('--team', help='Which team from settings to use',
        **kwargs_or_default(settings.DEFAULT_TEAM))
    parser.add_argument('--days', help='How many days back to go', default=30)
    parser.add_argument('--comment-days', help='How many days back to go for the comments')
    parser.add_argument('--just-tally', help='Just print the final tally', action='store_true')
    parser.add_argument('--just-email', help='Just one user by email address')
    args = parser.parse_args()
    list(args)
    cache.update()
Пример #29
0
def plugin_loaded():
    cache.load()
    style_parser.init_file_loading()
Пример #30
0
import editdistance
import sys
import cache

# Load list of artists
artistList = cache.load('artistList')

possibleDuplicates = []

# For each artist
for artist in artistList:
  # Compare against each other artist
  for otherArtist in artistList:
    # If we can reach otherArtist from artist in 1-2 letter changes
    if 0 < editdistance.eval(artist, otherArtist) < 2:
      # Add to possible duplicates
      possibleDuplicates.append([artist, otherArtist])

correct = 0
incorrect = 0

processed = []
for artist, otherArtist in possibleDuplicates:
  # If artist, otherArtist have not been compared against each-other yet
  if [artist, otherArtist] not in processed and [otherArtist, artist] not in processed:
    isCorrect = input(str(artist) + ',  ' + str(otherArtist) + '\n')
    # If user says this is a correct assumption
    if isCorrect == '1':
      correct += 1
    # If user says this is a false positive
    else:
def summarize_isolation_pushes_jobs_json(args):

    pushes = []

    test_isolation_bugzilla_data = get_test_isolation_bugzilla_data(args)
    for revision_url in test_isolation_bugzilla_data:
        revision_data = test_isolation_bugzilla_data[revision_url]
        new_args = copy.deepcopy(args)
        new_args.revision_url = revision_url
        (new_args.repo, _,
         new_args.revision) = new_args.revision_url.split('/')[-3:]
        new_args.add_bugzilla_suggestions = True
        new_args.state = 'completed'
        new_args.result = 'success|testfailed'
        new_args.job_type_name = '^test-'
        new_args.test_failure_pattern = TEST_FAILURE_PATTERN
        jobs_args.compile_filters(new_args)

        # Load the pushes/jobs data from cache if it exists.
        cache_attributes = ['test-isolation', new_args.repo]
        pushes_jobs_data = cache.load(cache_attributes, new_args.revision)
        if pushes_jobs_data and not args.update_cache:
            new_pushes = json.loads(pushes_jobs_data)
        else:
            new_pushes = get_pushes_jobs_json(new_args,
                                              new_args.repo,
                                              update_cache=args.update_cache)
            cache.save(cache_attributes, new_args.revision,
                       json.dumps(new_pushes, indent=2))

        pushes.extend(new_pushes)

        for revision_bug_data in revision_data:
            if args.bugs and revision_bug_data['bug_id'] not in args.bugs:
                # Skip if we requested a specific bug and this is not it.
                continue
            if args.bugs and args.override_bug_summary:
                revision_bug_data[
                    'bug_summary'] = bugzilla_summary_munge_failure(
                        args.override_bug_summary)

    pushes_jobs_data = None
    data = convert_pushes_to_test_isolation_bugzilla_data(args, pushes)

    #logger.info('convert_pushes_to_test_isolation_bugzilla_data\n{}'.format(
    #    json.dumps(data, indent=2)))

    summary = {}

    for revision_url in data:

        (repo, _, revision) = revision_url.split('/')[-3:]

        if revision_url not in summary:
            summary[revision_url] = {}
        summary_revision = summary[revision_url]

        job_type_names = sorted(data[revision_url].keys())

        for job_type_name in job_type_names:
            if job_type_name not in summary_revision:
                summary_revision[job_type_name] = dict(
                    notes=[],
                    isolation_job=
                    "{}/#/jobs?repo={}&tier=1%2C2%2C3&revision={}&searchStr={}"
                    .format(args.treeherder_url, repo, revision,
                            job_type_name),
                )
            summary_revision_job_type = summary_revision[job_type_name]

            job_type = data[revision_url][job_type_name]

            if 'bugzilla_data' not in summary_revision_job_type:
                summary_revision_job_type['bugzilla_data'] = copy.deepcopy(
                    test_isolation_bugzilla_data[revision_url])
                for bug_data in summary_revision_job_type['bugzilla_data']:
                    # bug_data['failure_reproduced'][section_name] counts the
                    # number of times the original bug_summary failure
                    # was seen in that section of jobs.
                    bug_data['failure_reproduced'] = dict(
                        original=0,
                        repeated=0,
                        id=0,
                        it=0,
                    )
                    # bug_data['test_reproduced'][section_name] counts the
                    # number of times the original bug_summary test
                    # was seen in that section of jobs.
                    bug_data['test_reproduced'] = dict(
                        original=0,
                        repeated=0,
                        id=0,
                        it=0,
                    )

            for section_name in (ORIGINAL_SECTIONS + ISOLATION_SECTIONS):
                if section_name not in summary_revision_job_type:
                    summary_revision_job_type[section_name] = dict(
                        failures={},
                        tests={},
                        failure_reproduced=0,
                        test_reproduced=0,
                    )
                    if section_name == 'original':
                        summary_revision_job_type[section_name][
                            'bug_job_map'] = []

                summary_revision_job_type_section = summary_revision_job_type[
                    section_name]

                job_type_section = job_type[section_name]

                run_time = 0
                jobs_testfailed_count = 0
                bugzilla_suggestions_count = 0

                for job in job_type_section:
                    if section_name == 'original':
                        summary_revision_job_type_section[
                            'bug_job_map'].extend(job['bug_job_map'])
                    run_time += job['end_timestamp'] - job['start_timestamp']
                    jobs_testfailed_count += 1 if job[
                        'result'] == 'testfailed' else 0
                    bugzilla_suggestions_count += len(
                        job['bugzilla_suggestions'])

                    for bugzilla_suggestion in job['bugzilla_suggestions']:

                        #failure = bugzilla_summary_munge_failure(bugzilla_suggestion['search'])
                        failure = bugzilla_suggestion['search']
                        if failure not in summary_revision_job_type_section[
                                'failures']:
                            summary_revision_job_type_section['failures'][
                                failure] = dict(
                                    count=0,
                                    failure_reproduced=0,
                                )

                        summary_revision_job_type_section['failures'][failure][
                            'count'] += 1
                        for bug_data in summary_revision_job_type[
                                'bugzilla_data']:
                            if args.bugs and args.override_bug_summary:
                                #pattern = convert_failure_to_pattern(bugzilla_summary_munge_failure(args.override_bug_summary))
                                pattern = convert_failure_to_pattern(
                                    args.override_bug_summary)
                            else:
                                pattern = bug_data['pattern']
                            if re.compile(pattern).search(failure):
                                bug_data['failure_reproduced'][
                                    section_name] += 1
                                summary_revision_job_type_section['failures'][
                                    failure]['failure_reproduced'] += 1
                                summary_revision_job_type_section[
                                    'failure_reproduced'] += 1

                            test = get_test(failure)
                            if test:
                                if test not in summary_revision_job_type_section[
                                        'tests']:
                                    summary_revision_job_type_section['tests'][
                                        test] = dict(
                                            count=0,
                                            test_reproduced=0,
                                        )

                                summary_revision_job_type_section['tests'][
                                    test]['count'] += 1
                                if args.bugs and args.override_bug_summary:
                                    bug_data_test = get_test(
                                        args.override_bug_summary)
                                else:
                                    bug_data_test = bug_data['test']
                                if bug_data_test and test in bug_data_test:
                                    bug_data['test_reproduced'][
                                        section_name] += 1
                                    summary_revision_job_type_section['tests'][
                                        test]['test_reproduced'] += 1
                                    summary_revision_job_type_section[
                                        'test_reproduced'] += 1

                summary_revision_job_type_section['run_time'] = run_time
                summary_revision_job_type_section[
                    'jobs_testfailed'] = jobs_testfailed_count
                summary_revision_job_type_section['jobs_total'] = len(
                    job_type_section)
                summary_revision_job_type_section[
                    'bugzilla_suggestions_count'] = bugzilla_suggestions_count

    return summary
def get_test_isolation_bugzilla_data(args):
    """Query Bugzilla for bugs marked with [test isolation] in the
    whiteboard.  Return a dictionary keyed by revision url containing
    the bug id and summary.

    """
    cache_attributes = ['test-isolation']

    bugzilla_data = cache.load(cache_attributes, 'bugzilla.json')
    if bugzilla_data and not args.update_cache:
        return json.loads(bugzilla_data)

    now = datetime.datetime.now()

    data = {}

    re_logview = re.compile(
        r'https://treeherder.mozilla.org/logviewer.html#\?job_id=([0-9]+)&repo=([a-z-]+)'
    )
    re_pushlog_url = re.compile(r'(https://.*)$\n', re.MULTILINE)

    query = BUGZILLA_URL + 'bug?'
    query_terms = {
        'include_fields': 'id,creation_time,whiteboard',
        'creation_time': args.bug_creation_time,
        'whiteboard': args.whiteboard,
        'limit': 100,
        'offset': 0,
    }
    if args.bugs:
        query_terms['id'] = ','.join([str(id) for id in args.bugs])
    else:
        query_terms['creation_time'] = args.bug_creation_time

    while True:
        response = utils.get_remote_json(query, params=query_terms)
        if 'error' in response:
            logger.error('Bugzilla({}, {}): {}'.format(query, query_terms,
                                                       response))
            return

        if len(response['bugs']) == 0:
            break

        # update query terms for next iteration of the loop.
        query_terms['offset'] += query_terms['limit']

        for bug in response['bugs']:
            #https://bugzilla.mozilla.org/rest/bug/1559260/comment

            if args.bugs_after and bug['id'] <= args.bugs_after:
                continue

            if args.whiteboard not in bug['whiteboard']:
                # The query performs an all words not substring
                # query, so restrict to the substring.
                continue

            if args.bugs and bug['id'] not in args.bugs:
                continue

            query2 = BUGZILLA_URL + 'bug/%s' % bug['id']
            response2 = utils.get_remote_json(query2)
            if 'error' in response2:
                logger.error('Bugzilla({}): {}'.format(query2, response2))
                return

            bug_summary = response2['bugs'][0]['summary']
            munged_bug_summary = bugzilla_summary_munge_failure(bug_summary)

            query3 = BUGZILLA_URL + 'bug/%s/comment' % bug['id']
            response3 = utils.get_remote_json(query3)
            if 'error' in response3:
                logger.error('Bugzilla({}): {}'.format(query, response3))
                return

            raw_text = response3['bugs'][str(
                bug['id'])]['comments'][0]['raw_text']

            match = re_logview.search(raw_text)
            if match:
                # Get push associated with this failed job.
                job_id = int(match.group(1))
                repo = match.group(2)
                job = get_job_by_repo_job_id_json(
                    args, repo, job_id, update_cache=args.update_cache)
                push_id = job['push_id']
                push = get_push_json(args,
                                     repo,
                                     push_id,
                                     update_cache=args.update_cache)
                repository = get_repository_by_id(
                    push['revisions'][0]['repository_id'])
                revision = push['revisions'][0]['revision']
                revision_url = '%s/rev/%s' % (repository['url'], revision)

                new_args = copy.deepcopy(args)
                new_args.revision_url = revision_url
                (new_args.repo, _,
                 new_args.revision) = new_args.revision_url.split('/')[-3:]
                new_args.add_bugzilla_suggestions = True
                new_args.state = 'completed'
                new_args.result = 'success|testfailed'
                #new_args.job_type_name = '^test-'
                new_args.job_type_name = job['job_type_name']
                new_args.test_failure_pattern = TEST_FAILURE_PATTERN
                pushes_args.compile_filters(new_args)
                jobs_args.compile_filters(new_args)

                if revision_url not in data:
                    data[revision_url] = []

                mozharness_failure = match_bug_summary_to_mozharness_failure(
                    bug_summary, raw_text)

                test = None
                if mozharness_failure:
                    test = get_test(mozharness_failure)
                    pattern = convert_failure_to_pattern(mozharness_failure)
                if not test:
                    test = get_test(munged_bug_summary)
                    pattern = convert_failure_to_pattern(munged_bug_summary)
                if not test:
                    logger.warning('Unable to obtain test for '
                                   'bug {} {} failure {}'.format(
                                       bug['id'], bug_summary,
                                       mozharness_failure))

                bug_data = {
                    'bug_id':
                    bug['id'],
                    'bug_summary':
                    bug_summary,
                    'munged_bug_summary':
                    munged_bug_summary,
                    'job_type_name':
                    job['job_type_name'],
                    'test':
                    test,
                    'mozharness_failure':
                    mozharness_failure,
                    'job_id':
                    job_id,
                    'push_id':
                    push_id,
                    'repository':
                    repository['name'],
                    'revision_url':
                    revision_url,
                    'bugzilla_suggestions':
                    get_job_bugzilla_suggestions_json(
                        new_args,
                        new_args.repo,
                        job_id,
                        update_cache=args.update_cache),
                    'bug_job_map':
                    get_bug_job_map_json(new_args,
                                         new_args.repo,
                                         job_id,
                                         update_cache=args.update_cache),
                    'pattern':
                    pattern,
                }

                data[revision_url].append(bug_data)

                # Get failure counts for trunk for this bug for the two weeks following
                # the creation of the bug. Ignore failure counts for bugs who are less
                # than 2 weeks old.
                # TODO: Allow in place updating of bugzilla.json so that we can reprocess
                # the failure counts without having to query the full set of bugs.
                start_date = datetime.datetime.strptime(
                    bug['creation_time'].rstrip('Z'),
                    '%Y-%m-%dT%H:%M:%S') - datetime.timedelta(days=1)
                end_date = start_date + datetime.timedelta(days=15)
                failure_count_json = get_failure_count_json(
                    args, 'trunk', bug['id'], start_date, end_date)
                if now - start_date < datetime.timedelta(days=15):
                    failure_count = None
                else:
                    failure_count = 0
                    for failures in failure_count_json:
                        failure_count += failures['failure_count']
                bug_data['failure_count'] = failure_count

            elif args.whiteboard and False:  #Disable this as it is buggy.
                # This run has specified the test or is this is a bug
                # that is not a Treeherder filed bug. If it was marked
                # via the whiteboad then we are interested in the
                # pushes for this bug.  Since we can't really tell
                # which is which, we can include all of the pushes
                # since only those with test isolation jobs will
                # matter.  The problem is this bug does not
                # necessarily have a bug_summary referencing a test
                # failure...
                test = None  # We don't have a failure in this case.
                comments = response3['bugs'][str(bug['id'])]['comments']
                for comment in comments:
                    if not comment['raw_text'].startswith('Pushed by'):
                        continue
                    # Get the last revision in the comment as the head of the push.
                    revision_url = None
                    pushlog_url_match = re_pushlog_url.search(
                        comment['raw_text'])
                    while pushlog_url_match:
                        revision_url = pushlog_url_match.group(1)
                        pushlog_url_match = re_pushlog_url.search(
                            comment['raw_text'], pushlog_url_match.end(1))
                    if revision_url:
                        # revision_url from Bugzilla has the 12 character revision.
                        new_args = copy.deepcopy(args)
                        new_args.revision_url = revision_url
                        (new_args.repo, _, new_args.revision
                         ) = new_args.revision_url.split('/')[-3:]
                        new_args.add_bugzilla_suggestions = True
                        new_args.state = 'completed'
                        new_args.job_type_name = '^test-'
                        new_args.test_failure_pattern = TEST_FAILURE_PATTERN
                        pushes_args.compile_filters(new_args)
                        jobs_args.compile_filters(new_args)

                        pushes = get_pushes_jobs_json(
                            new_args,
                            new_args.repo,
                            update_cache=args.update_cache)
                        if len(pushes):
                            # Convert the revision url to 40 characters.
                            push = pushes[0]
                            repository = get_repository_by_id(
                                push['revisions'][0]['repository_id'])
                            revision = push['revisions'][0]['revision']
                            revision_url = '%s/rev/%s' % (repository['url'],
                                                          revision)
                            new_args.revision_url = revision_url
                            (new_args.repo, _, new_args.revision
                             ) = new_args.revision_url.split('/')[-3:]

                            if revision_url not in data:
                                data[revision_url] = []

                            push_id = push['id']
                            repository = get_repository_by_id(
                                push['revisions'][0]['repository_id'])
                            # Only the original job is of interest for collecting the bugzilla data.
                            # The others are the retriggers.
                            #  There shouldn't be a bug_job_map or bugzilla_suggestions for non-classified bugs.
                            job_id = push['jobs'][0]

                            bug_data = {
                                'bug_id': bug['id'],
                                'bug_summary': bug_summary,
                                'test': test,
                                'job_id': job_id,
                                'push_id': push_id,
                                'repository': repository['name'],
                                'revision_url': revision_url,
                                'bugzilla_suggestions': [],
                                'bug_job_map': [],
                                'pattern':
                                convert_failure_to_pattern(bug_summary),
                            }
                            data[revision_url].append(bug_data)

                            # Get failure counts for trunk for this bug for the two weeks following
                            # the creation of the bug. Ignore failure counts for bugs who are less
                            # than 2 weeks old. Use the previous day for the start date and 15 days
                            # to account for timezone issues.
                            # TODO: Allow in place updating of bugzilla.json so that we can reprocess
                            # the failure counts without having to query the full set of bugs.
                            start_date = datetime.datetime.strptime(
                                bug['creation_time'].rstrip('Z'),
                                '%Y-%m-%dT%H:%M:%S') - datetime.timedelta(
                                    days=1)
                            end_date = start_date + datetime.timedelta(days=15)
                            failure_count_json = get_failure_count_json(
                                args, 'trunk', bug['id'], start_date, end_date)
                            if now - start_date < datetime.timedelta(days=15):
                                failure_count = None
                            else:
                                failure_count = 0
                                for failures in failure_count_json:
                                    failure_count += failures['failure_count']
                            bug_data['failure_count'] = failure_count

    cache.save(cache_attributes, 'bugzilla.json', json.dumps(data, indent=2))

    return data
Пример #33
0
 def draw_image(self, img):
     self.image = cache.load(img)
     self.rect.size = self.image.get_size()
Пример #34
0
from SPARQLWrapper import SPARQLWrapper, JSON
import collections, re
from fuzzywuzzy import fuzz
finalFrequency = {}
import string
import numpy as np
import sklearn.cluster
from nltk.corpus import stopwords
import distance
import enchant
global groups
s = set(stopwords.words('english'))
from rdflib.namespace import SKOS
import cache

artistList = sorted(cache.load('artistList'))
genreList = sorted(cache.load('genreList'))
locationList = sorted(cache.load('locationList'))

words = np.asarray(list(genreList))  # So that indexing with a list will work

print(str('Calculating levenstein similarity'))
lev_similarity = -1 * np.array([[distance.levenshtein(w1, w2) for w1 in words]
                                for w2 in words])
print(str('Clustering begin'))
affprop = sklearn.cluster.AffinityPropagation(affinity="precomputed",
                                              damping=0.5)
affprop.fit(lev_similarity)
groups = {}
for cluster_id in np.unique(affprop.labels_):
    print(str(cluster_id))
    def __init__(self):
        self.locations = cache.load('newReversedGroupedLocations')

        countries = []
        for key in self.locations.keys():
            countries.append(key.split(',')[-1].rstrip())
        countries = list(set(countries))

        translation = {
            'Slovaka': 'Slovakia',
            'Trinidad and Tobao': 'Trinidad and Tobago',
            'Luxemboug': 'Luxembourg',
            'Icelad': 'Iceland',
            'Cua': 'Cuba',
            'Brazl': 'Brazil',
            'Belgim': 'Belgium',
            'Portugl': 'Portugal',
            'Pakistn': 'Pakistan',
            'Moroco': 'Morroco',
            'Swedn': 'Sweden',
            'Costa Ria': 'Costa Rica',
            'Ecuadr': 'Eduador',
            'Canaa': 'Canada',
            'Greee': 'Greece',
            #' K' : 'UK',
            'Austra': 'Austria',
            'Australa': 'Australia',
            'Czechna': 'Czechnia',
            'Iceld': 'Iceland',
            'Peu': 'Peru',
            'Itay': 'Italy',
            'The Bahams': 'The Bahamas',
            'Netherlans': 'Netherlands',
            'Span': 'Spain',
            'Denmak': 'Denmark',
            'Hong Kog': 'Hong Kong',
            'Isral': 'Israel',
            'Lithuana': 'Lithuania',
            'Germay': 'Germany',
            'Norwy': 'Norway',
            'Jamaia': 'Jamaica',
            'Polad': 'Poland',
            'Nicaraga': 'Nicaragra',
            'Frane': 'France',
            'Serba': 'Serbia',
            'UA': 'USA',
            'Hungay': 'Hungry',
            'Switzerlad': 'Switzerland',
            'Austriala': 'Australia',
            'SSolomon Islans': 'Solomon Islands',
            'Boliva': 'Bolivia'
        }

        new_dict = {}
        for key in self.locations.keys():
            oldCountry = key[key.rfind(',') + 2:]
            newCountry = oldCountry
            if newCountry == 'K':
                newCountry = 'UK'
            for country_key in translation.keys():
                newCountry = newCountry.replace(
                    country_key, translation[country_key]).rstrip()

            newKey = key[:key.rfind(',') + 2] + newCountry
            new_dict[newKey] = self.locations[key]

        cache.save(new_dict, 'newReversedGroupedLocations')
Пример #36
0
def get_pushes_jobs_job_details_json(args, repo, update_cache=False):
    """get_pushes_jobs_job_details_json

    Retrieve nested pushes, jobs, job details matching args set via
    push_args parser and job_args parser.

    """
    if hasattr(args, 'update_cache'):
        update_cache = args.update_cache

    cache_attributes = ['treeherder', repo, 'job_details']

    pushes = get_pushes_jobs_json(args, repo, update_cache=update_cache)

    for push in pushes:
        for job in push['jobs']:
            # job['job_guid'] contains a slash followed by the run number.
            # Convert this into a value which can be used a file name
            # by replacing / with _.
            job_guid_path = job['job_guid'].replace('/', '_')
            job_details_data = cache.load(cache_attributes, job_guid_path)
            if job_details_data and not update_cache:
                job['job_details'] = json.loads(job_details_data)
            else:
                job['job_details'] = []
                # We can get all of the job details from CLIENT.get_job_details while
                # get_job_log_url only gives us live_backing.log and live.log.
                job['job_details'] = retry_client_request(
                    CLIENT.get_job_details, 3, job_guid=job['job_guid'])
                if job['job_details'] is None:
                    logger.warning("Unable to get job_details for job_guid %s",
                                   job['job_guid'])
                    continue
                cache.save(cache_attributes, job_guid_path,
                           json.dumps(job['job_details'], indent=2))

            if hasattr(args, 'add_resource_usage') and args.add_resource_usage:
                for attempt in range(3):
                    try:
                        for job_detail in job['job_details']:
                            if job_detail['value'] == 'resource-usage.json':
                                resource_usage_name = job_guid_path + '-' + job_detail[
                                    'value']
                                job_detail_resource_usage_data = cache.load(
                                    cache_attributes, resource_usage_name)
                                if job_detail_resource_usage_data and not update_cache:
                                    job['resource_usage'] = json.loads(
                                        job_detail_resource_usage_data)
                                    job_detail_resource_usage_data = None
                                else:
                                    job['resource_usage'] = utils.get_remote_json(
                                        job_detail['url'])
                                    cache.save(
                                        cache_attributes, resource_usage_name,
                                        json.dumps(job['resource_usage'],
                                                   indent=2))
                                break
                        break
                    except requests.HTTPError as e:
                        if '503 Server Error' not in str(e):
                            raise
                        logger.exception(
                            'get_job_details resource %s attempt %s', attempt)
                    except requests.ConnectionError:
                        logger.exception(
                            'get_job_details resource %s attempt %s', attempt)
                    if attempt != 2:
                        time.sleep(30)
                if attempt == 2:
                    logger.warning("Unable to get job_details for job_guid %s",
                                   job['job_guid'])
                    continue
    return pushes
Пример #37
0
def plugin_loaded():
    cache.load()
    style_parser.init_file_loading()
Пример #38
0
 def set_image(self, img):
     self.image = cache.load(img)