def update_jobpriorities(to_insert, _priority, _timeout): # to_insert is currently high priority, pri=1 jobs, all else are pri=5 jobs changed_jobs = [] for item in to_insert: # NOTE: we ignore JobPriorities with expires as they take precendence data = session.query(JobPriorities.id, JobPriorities.priority)\ .filter(and_(JobPriorities.testtype == item[2], JobPriorities.buildtype == item[1], JobPriorities.platform == item[0], JobPriorities.expires == None)).all() if len(data) != 1: # TODO: if 0 items, do we add the job? if >1 do we alert and cleanup? continue if data[0][1] != _priority: changed_jobs.append(item) conn = engine.connect() statement = update(JobPriorities)\ .where(and_(JobPriorities.testtype == item[2], JobPriorities.buildtype == item[1], JobPriorities.platform == item[0]))\ .values(priority=_priority, timeout=_timeout) conn.execute(statement) return changed_jobs
def increase_jobs_priority(high_value_jobs, priority=1, timeout=0): """For every high value job try to see if we need to update increase its priority Currently, high value jobs have a priority of 1 and a timeout of 0. Return how many jobs had their priority increased """ changed_jobs = [] for item in high_value_jobs: # NOTE: we ignore JobPriorities with expires as they take precendence data = session.query(JobPriorities.id, JobPriorities.priority)\ .filter(and_(JobPriorities.testtype == item[2], JobPriorities.buildtype == item[1], JobPriorities.platform == item[0], JobPriorities.expires == None)).all() # flake8: noqa if len(data) != 1: # TODO: if 0 items, do we add the job? if >1 do we alert and cleanup? continue if data[0][1] != priority: changed_jobs.append(item) conn = engine.connect() statement = update(JobPriorities)\ .where(and_(JobPriorities.testtype == item[2], JobPriorities.buildtype == item[1], JobPriorities.platform == item[0]))\ .values(priority=priority, timeout=timeout) conn.execute(statement) return changed_jobs
def reset_preseed(): data = session.query(JobPriorities.expires, JobPriorities.id)\ .filter(JobPriorities.expires != None).all() now = datetime.datetime.now() for item in data: try: dv = datetime.datetime.strptime(item[0], "%Y-%M-%d") except ValueError: # TODO: consider updating column to have expires=None? continue except TypeError: dv = datetime.datetime.combine(item[0].today(), datetime.datetime.min.time()) # reset expire field if date is today or in the past if dv.date() <= now.date(): conn = engine.connect() statement = update(JobPriorities)\ .where(JobPriorities.id == item[1])\ .values(expires=None) conn.execute(statement)
def clear_expiration_field_for_expired_jobs(): data = session.query(JobPriorities.expires, JobPriorities.id).filter( JobPriorities.expires != None).all() # flake8: noqa now = datetime.datetime.now() for item in data: try: expiration_date = datetime.datetime.strptime(item[0], "%Y-%M-%d") except ValueError: # TODO: consider updating column to have expires=None? LOG.warning('Failed to downcast to datetime for ({},{})'.format( item[1], item[0])) continue except TypeError: expiration_date = datetime.datetime.combine( item[0].today(), datetime.datetime.min.time()) # reset expiration field if the date is today or in the past if expiration_date.date() <= now.date(): conn = engine.connect() statement = update(JobPriorities)\ .where(JobPriorities.id == item[1])\ .values(expires=None) conn.execute(statement)
def update_preseed(): """ We sync preseed.json to jobpririties in server on startup, since that is the only time we expect preseed.json to change. """ # get preseed data first preseed_path = os.path.join(os.path.dirname(SCRIPT_DIR), 'src', 'preseed.json') preseed = [] with open(preseed_path, 'r') as fHandle: preseed = json.load(fHandle) # Preseed data will have fields: buildtype,testtype,platform,priority,timeout,expires # The expires field defaults to 2 weeks on a new job in the database # Expires field has a date "YYYY-MM-DD", but can have "*" to indicate never # Typical priority will be 1, but if we want to force coalescing we can do that # One hack is that if we have a * in a buildtype,testtype,platform field, then # we assume it is for all flavors of the * field: i.e. linux64,pgo,* - all tests # assumption - preseed fields are sanitized already - move parse_testtype to utils.py ? for job in preseed: data = session.query(JobPriorities.id, JobPriorities.testtype, JobPriorities.buildtype, JobPriorities.platform, JobPriorities.priority, JobPriorities.timeout, JobPriorities.expires, JobPriorities.buildsystem) if job['testtype'] != '*': data = data.filter( getattr(JobPriorities, 'testtype') == job['testtype']) if job['buildtype'] != '*': data = data.filter( getattr(JobPriorities, 'buildtype') == job['buildtype']) if job['platform'] != '*': data = data.filter( getattr(JobPriorities, 'platform') == job['platform']) data = data.all() _buildsystem = job["build_system_type"] # TODO: edge case: we add future jobs with a wildcard, when jobs show up # remove the wildcard, apply priority/timeout/expires to new jobs # Deal with the case where we have a new entry in preseed if len(data) == 0: _expires = job['expires'] if _expires == '*': _expires = str(datetime.now().date() + timedelta(days=365)) LOG.info("adding a new unknown job to the database: %s" % job) newjob = JobPriorities(job['testtype'], job['buildtype'], job['platform'], job['priority'], job['timeout'], _expires, _buildsystem) session.add(newjob) session.commit() session.close() continue # We can have wildcards, so loop on all returned values in data for d in data: changed = False LOG.info("updating existing job %s/%s/%s" % (d[1], d[2], d[3])) _expires = job['expires'] _priority = job['priority'] _timeout = job['timeout'] # we have a taskcluster job in the db, and new job in preseed if d[7] != _buildsystem: _buildsystem = "*" changed = True # When we have a defined date to expire a job, parse and use it if _expires == '*': _expires = str(datetime.now().date() + timedelta(days=365)) try: dv = datetime.strptime(_expires, "%Y-%M-%d").date() except ValueError: continue # When we have expired, use existing priority/timeout, reset expires if dv <= datetime.now().date(): LOG.info(" -- past the expiration date- reset!") _expires = '' _priority = d[4] _timeout = d[5] changed = True if changed: # TODO: do we need to try/except/finally with commit/rollback statements conn = engine.connect() statement = update(JobPriorities).where( JobPriorities.id == d[0]).values(priority=_priority, timeout=_timeout, expires=_expires, buildsystem=_buildsystem) conn.execute(statement)
def run_seta_details_query(): buildbot = sanitize_bool(request.args.get("buildbot", 0)) branch = sanitize_string(request.args.get("branch", 'mozilla-inbound')) taskcluster = sanitize_bool(request.args.get("taskcluster", 0)) priority = int(sanitize_string(request.args.get("priority", '5'))) jobnames = JOBSDATA.jobnames_query() date = str(datetime.now().date()) retVal = {} retVal[date] = [] jobtype = [] # we only support fx-team, autoland, and mozilla-inbound branch in seta if (str(branch) in ['fx-team', 'mozilla-inbound', 'autoland']) is not True \ and str(branch) != '': abort(404) # For the case of TaskCluster request, we don't care which priority the user request. # We return jobs depend on the strategy that we return high value jobs as default and # return all jobs for every 5 push or 90 minutes for that branch. if request.headers.get('User-Agent', '') == 'TaskCluster': # we make taskcluster to 1 if it's a request from taskcluster, it's more reasonable and # can simplify the request url. taskcluster = 1 # we will return all jobs for every 90 minutes, so the return_all_jobs flag will been # set to true if the time limit been reached. return_all_jobs = False # We should return full job list as a fallback, if it's a request from # taskcluster and without head_rev or pushlog_id in there try: branch_info = session.query( TaskRequests.counter, TaskRequests.datetime, TaskRequests.reset_delta).filter( TaskRequests.branch == branch).all() except: branch_info = [] time_of_now = datetime.now() # If we got nothing related with that branch, we should create it. if len(branch_info) == 0: # time_of_lastreset is not a good name anyway :( # And we treat all branches' reset_delta is 90 seconds, we should find a # better delta for them in the further. branch_data = TaskRequests(str(branch), 1, time_of_now, RESET_DELTA) try: session.add(branch_data) session.commit() except Exception as error: LOG.debug(error) session.rollback() finally: session.close() counter = 1 time_string = time_of_now reset_delta = RESET_DELTA # We should update it if that branch had already been stored. else: counter, time_string, reset_delta = branch_info[0] counter += 1 conn = engine.connect() statement = update(TaskRequests).where( TaskRequests.branch == branch).values(counter=counter) conn.execute(statement) delta = (time_of_now - time_string).total_seconds() # we should update the time recorder if the elapse time had # reach the time limit of that branch. if delta >= reset_delta: conn = engine.connect() statement = update(TaskRequests).where( TaskRequests.branch == branch).values(datetime=time_of_now) conn.execute(statement) # we need to set the return_all_jobs flag to true. return_all_jobs = True # we query all jobs rather than jobs filter by the requested priority in here, # Because we need to set the job returning strategy depend on different job priority. query = session.query(JobPriorities.platform, JobPriorities.buildtype, JobPriorities.testtype, JobPriorities.priority, JobPriorities.timeout).all() for d in query: # we only return that job if it hasn't reach the timeout limit. And the # timeout is zero means this job need always running. if delta < d[4] or d[4] == 0: # Due to the priority of all high value jobs is 1, and we # need to return all jobs for every 5 pushes(for now). if counter % d[3] != 0: jobtype.append([d[0], d[1], d[2]]) # we need to return all jobs for every 90 minutes, so all jobs will been returned # if the delta is larger than 5400 elif return_all_jobs: jobtype.append([d[0], d[1], d[2]]) # We don't care about the timeout variable of job if it's not a taskcluster request. else: query = session.query( JobPriorities.platform, JobPriorities.buildtype, JobPriorities.testtype, JobPriorities.priority, ).all() # priority = 0; run all the jobs if priority != 1 and priority != 5: priority = 0 # Because we store high value jobs in seta table as default, # so we return low value jobs, means no failure related with this job as default if priority == 0: jobtype = JOBSDATA.jobtype_query( ) # All jobs regardless of priority # priority =5 run all low value jobs else: joblist = [job for job in query if job[3] == priority] for j in joblist: jobtype.append([j[0], j[1], j[2]]) # TODO: filter out based on buildsystem from database, either 'buildbot' or '*' if buildbot: active_jobs = [] # pick up buildbot jobs from job list to faster the filter process buildbot_jobs = [ job for job in jobnames if job['buildplatform'] == 'buildbot' ] # find out the correspond job detail information for job in jobtype: for j in buildbot_jobs: if j['name'] == job[2] and j['platform'] == job[0] and j[ 'buildtype'] == job[1]: active_jobs.append( j['ref_data_name'] if branch is 'mozilla-inbound' else j['ref_data_name']. replace('mozilla-inbound', branch)) jobtype = active_jobs # TODO: filter out based on buildsystem from database, either 'taskcluster' or '*' if taskcluster: active_jobs = [] taskcluster_jobs = [ job for job in jobnames if job['buildplatform'] == 'taskcluster' ] for job in jobtype: # we need to retranslate the jobtype back to the proper data form after all. job[2] = job[2].replace('e10s-browser-chrome', 'browser-chrome-e10s') job[2] = job[2].replace('e10s-devtools-chrome', 'devtools-chrome-e10s') job[2] = job[2].replace('gl-', 'webgl-') for j in taskcluster_jobs: if job[2] in j['name'] and j['platform'] == job[0] and j[ 'buildtype'] == job[1]: active_jobs.append(j['ref_data_name']) jobtype = active_jobs retVal[date] = jobtype return {"jobtypes": retVal}
def _update_job_priority_table(data): """Add new jobs to the priority table and update the build system if required.""" LOG.info('Fetch all rows from the job priority table.') # Get all rows of job priorities db_data = session.query(JobPriorities.id, JobPriorities.testtype, JobPriorities.buildtype, JobPriorities.platform, JobPriorities.priority, JobPriorities.timeout, JobPriorities.expires, JobPriorities.buildsystem).all() # TODO: write test for this # When the table is empty it means that we're starting the system for the first time # and we're going to use different default values map = {} if not len(db_data) == 0: priority = 1 timeout = 0 # Using %Y-%m-%d fixes this issue: # Warning: Incorrect date value: '2016-10-28 17:36:58.153265' for column 'expires' at row 1 expiration_date = (datetime.datetime.now() + datetime.timedelta(days=14)).strftime("%Y-%m-%d") # Creating this data structure which will reduce how many times we iterate through the DB rows for row in db_data: key = tuple(row[1:4]) # This is guaranteed by a unique composite index for these 3 fields in models.py assert key not in map,\ '"{}" should be a unique row and that is unexpected.'.format(key) # (testtype, buildtype, platform) map[key] = {'pk': row[0], 'build_system_type': row[7]} else: priority = 5 timeout = 5400 expiration_date = None total_jobs = len(data) new_jobs = 0 failed_changes = 0 updated_jobs = 0 # Loop through sanitized jobs, add new jobs and update the build system if needed for job in data: _buildsystem = job["build_system_type"] key = _unique_key(job) if key in map: # We already know about this job, we might need to update the build system row_build_system_type = map[key]['build_system_type'] if row_build_system_type == '*' or _buildsystem == '*': # We don't need to update anything pass else: # We're seeing the job again but for another build system (e.g. buildbot vs # taskcluster). We need to change it to '*' if row_build_system_type != _buildsystem: _buildsystem = "*" # Update table with new buildsystem try: conn = engine.connect() statement = update(JobPriorities).where( JobPriorities.id == map[key]['pk_key']).values( buildsystem=_buildsystem) conn.execute(statement) LOG.info('Updated {}/{} from {} to {}'.format( job['testtype'], job['platform_option'], job['build_system_type'], _buildsystem)) updated_jobs += 1 except Exception as e: LOG.info("key = %s, buildsystem = %s" % (key, _buildsystem)) LOG.info("exception updating jobPriorities: %s" % e) else: # We have a new job from runnablejobs to add to our master list try: jobpriority = JobPriorities(str(job["testtype"]), str(job["platform_option"]), str(job["platform"]), priority, timeout, expiration_date, _buildsystem) session.add(jobpriority) session.commit() LOG.info('New job was found ({},{},{},{})'.format( job['testtype'], job['platform_option'], job['platform'], _buildsystem, )) new_jobs += 1 except Exception as error: session.rollback() LOG.warning(error) failed_changes += 1 finally: session.close() LOG.info( 'We have {} new jobs and {} updated jobs out of {} total jobs processed.' .format(new_jobs, updated_jobs, total_jobs)) if failed_changes != 0: LOG.error( 'We have failed {} changes out of {} total jobs processed.'.format( failed_changes, total_jobs))
def add_jobs_to_jobpriority(new_data=None, priority=1, timeout=0, set_expired=False): added_jobs = [] if not new_data: return # TODO: as a perf improvement we can reduce jobs prior to this expensive for loop for job in new_data['results']: # TODO: potentially ensure no duplicates in new_data and query once outside the loop db_data = [] db_data = session.query(JobPriorities.id, JobPriorities.testtype, JobPriorities.buildtype, JobPriorities.platform, JobPriorities.priority, JobPriorities.timeout, JobPriorities.expires, JobPriorities.buildsystem).all() platform = parse_platform(job['build_platform']) if platform == None or platform == "": continue testtype = parse_testtype(job['build_system_type'], job['ref_data_name'], job['platform_option'], job['job_type_name']) if testtype == None or testtype == "": continue _buildsystem = job["build_system_type"] found = False found_id = None for row in db_data: if (row[1] == testtype and row[3] == platform and row[2] == job["platform_option"]): #TODO: what if we have a race condition with two identical jobs # verify the build system type is the same, or make it * found = True if row[7] != "*" and _buildsystem != row[7]: _buildsystem = "*" found_id = row[0] # We have new jobs from runnablejobs to add to our master list if not found: _expired = None if set_expired: # set _expired = today + 14 days # TODO: write test for it _expired = "%s" % (datetime.datetime.now() + datetime.timedelta(days=14)) try: jobpriority = JobPriorities(str(testtype), str(job["platform_option"]), str(job["build_platform"]), priority, timeout, _expired, _buildsystem) session.add(jobpriority) session.commit() added_jobs.append(job) except Exception as error: session.rollback() logging.warning(error) finally: session.close() elif _buildsystem != job['build_system_type']: # update table with new buildsystem conn = engine.connect() statement = update(JobPriorities)\ .where(JobPriorities.id == found_id)\ .values(buildsystem=_buildsystem) conn.execute(statement) return added_jobs
def update_preseed(): """ We sync preseed.json to jobpririties in server on startup, since that is the only time we expect preseed.json to change. """ # get preseed data first preseed_path = os.path.join(os.path.dirname(SCRIPT_DIR), 'src', 'preseed.json') preseed = [] with open(preseed_path, 'r') as fHandle: preseed = json.load(fHandle) # Preseed data will have fields: buildtype,testtype,platform,priority,timeout,expires # The expires field defaults to 2 weeks on a new job in the database # Expires field has a date "YYYY-MM-DD", but can have "*" to indicate never # Typical priority will be 1, but if we want to force coalescing we can do that # One hack is that if we have a * in a buildtype,testtype,platform field, then # we assume it is for all flavors of the * field: i.e. linux64,pgo,* - all tests # assumption - preseed fields are sanitized already - move parse_testtype to utils.py ? for job in preseed: data = session.query(JobPriorities.id, JobPriorities.testtype, JobPriorities.buildtype, JobPriorities.platform, JobPriorities.priority, JobPriorities.timeout, JobPriorities.expires, JobPriorities.buildsystem) if job['testtype'] != '*': data = data.filter(getattr(JobPriorities, 'testtype') == job['testtype']) if job['buildtype'] != '*': data = data.filter(getattr(JobPriorities, 'buildtype') == job['buildtype']) if job['platform'] != '*': data = data.filter(getattr(JobPriorities, 'platform') == job['platform']) data = data.all() _buildsystem = job["build_system_type"] # TODO: edge case: we add future jobs with a wildcard, when jobs show up # remove the wildcard, apply priority/timeout/expires to new jobs # Deal with the case where we have a new entry in preseed if len(data) == 0: _expires = job['expires'] if _expires == '*': _expires = str(datetime.now().date() + timedelta(days=365)) LOG.info("adding a new unknown job to the database: %s" % job) newjob = JobPriorities(job['testtype'], job['buildtype'], job['platform'], job['priority'], job['timeout'], _expires, _buildsystem) session.add(newjob) session.commit() session.close() continue # We can have wildcards, so loop on all returned values in data for d in data: changed = False LOG.info("updating existing job %s/%s/%s" % (d[1], d[2], d[3])) _expires = job['expires'] _priority = job['priority'] _timeout = job['timeout'] # we have a taskcluster job in the db, and new job in preseed if d[7] != _buildsystem: _buildsystem = "*" changed = True # When we have a defined date to expire a job, parse and use it if _expires == '*': _expires = str(datetime.now().date() + timedelta(days=365)) try: dv = datetime.strptime(_expires, "%Y-%M-%d").date() except ValueError: continue # When we have expired, use existing priority/timeout, reset expires if dv <= datetime.now().date(): LOG.info(" -- past the expiration date- reset!") _expires = '' _priority = d[4] _timeout = d[5] changed = True if changed: # TODO: do we need to try/except/finally with commit/rollback statements conn = engine.connect() statement = update(JobPriorities).where( JobPriorities.id == d[0]).values( priority=_priority, timeout=_timeout, expires=_expires, buildsystem=_buildsystem) conn.execute(statement)
def run_seta_details_query(): buildbot = sanitize_bool(request.args.get("buildbot", 0)) branch = sanitize_string(request.args.get("branch", 'mozilla-inbound')) taskcluster = sanitize_bool(request.args.get("taskcluster", 0)) priority = int(sanitize_string(request.args.get("priority", '5'))) jobnames = JOBSDATA.jobnames_query() date = str(datetime.now().date()) retVal = {} retVal[date] = [] jobtype = [] # we only support fx-team, autoland, and mozilla-inbound branch in seta if (str(branch) in ['fx-team', 'mozilla-inbound', 'autoland']) is not True \ and str(branch) != '': abort(404) # For the case of TaskCluster request, we don't care which priority the user request. # We return jobs depend on the strategy that we return high value jobs as default and # return all jobs for every 5 push or 90 minutes for that branch. if request.headers.get('User-Agent', '') == 'TaskCluster': # we make taskcluster to 1 if it's a request from taskcluster, it's more reasonable and # can simplify the request url. taskcluster = 1 # we will return all jobs for every 90 minutes, so the return_all_jobs flag will been # set to true if the time limit been reached. return_all_jobs = False # We should return full job list as a fallback, if it's a request from # taskcluster and without head_rev or pushlog_id in there try: branch_info = session.query(TaskRequests.counter, TaskRequests.datetime, TaskRequests.reset_delta).filter( TaskRequests.branch == branch).all() except: branch_info = [] time_of_now = datetime.now() # If we got nothing related with that branch, we should create it. if len(branch_info) == 0: # time_of_lastreset is not a good name anyway :( # And we treat all branches' reset_delta is 90 seconds, we should find a # better delta for them in the further. branch_data = TaskRequests(str(branch), 1, time_of_now, RESET_DELTA) try: session.add(branch_data) session.commit() except Exception as error: LOG.debug(error) session.rollback() finally: session.close() counter = 1 time_string = time_of_now reset_delta = RESET_DELTA # We should update it if that branch had already been stored. else: counter, time_string, reset_delta = branch_info[0] counter += 1 conn = engine.connect() statement = update(TaskRequests).where( TaskRequests.branch == branch).values( counter=counter) conn.execute(statement) delta = (time_of_now - time_string).total_seconds() # we should update the time recorder if the elapse time had # reach the time limit of that branch. if delta >= reset_delta: conn = engine.connect() statement = update(TaskRequests).where( TaskRequests.branch == branch).values( datetime=time_of_now) conn.execute(statement) # we need to set the return_all_jobs flag to true. return_all_jobs = True # we query all jobs rather than jobs filter by the requested priority in here, # Because we need to set the job returning strategy depend on different job priority. query = session.query(JobPriorities.platform, JobPriorities.buildtype, JobPriorities.testtype, JobPriorities.priority, JobPriorities.timeout ).all() for d in query: # we only return that job if it hasn't reach the timeout limit. And the # timeout is zero means this job need always running. if delta < d[4] or d[4] == 0: # Due to the priority of all high value jobs is 1, and we # need to return all jobs for every 5 pushes(for now). if counter % d[3] != 0: jobtype.append([d[0], d[1], d[2]]) # we need to return all jobs for every 90 minutes, so all jobs will been returned # if the delta is larger than 5400 elif return_all_jobs: jobtype.append([d[0], d[1], d[2]]) # We don't care about the timeout variable of job if it's not a taskcluster request. else: query = session.query(JobPriorities.platform, JobPriorities.buildtype, JobPriorities.testtype, JobPriorities.priority, ).all() # priority = 0; run all the jobs if priority != 1 and priority != 5: priority = 0 # Because we store high value jobs in seta table as default, # so we return low value jobs, means no failure related with this job as default if priority == 0: jobtype = JOBSDATA.jobtype_query() # All jobs regardless of priority # priority =5 run all low value jobs else: joblist = [job for job in query if job[3] == priority] for j in joblist: jobtype.append([j[0], j[1], j[2]]) # TODO: filter out based on buildsystem from database, either 'buildbot' or '*' if buildbot: active_jobs = [] # pick up buildbot jobs from job list to faster the filter process buildbot_jobs = [job for job in jobnames if job['buildplatform'] == 'buildbot'] # find out the correspond job detail information for job in jobtype: for j in buildbot_jobs: if j['name'] == job[2] and j['platform'] == job[0] and j['buildtype'] == job[1]: active_jobs.append(j['ref_data_name'] if branch is 'mozilla-inbound' else j['ref_data_name'].replace( 'mozilla-inbound', branch)) jobtype = active_jobs # TODO: filter out based on buildsystem from database, either 'taskcluster' or '*' if taskcluster: active_jobs = [] taskcluster_jobs = [job for job in jobnames if job['buildplatform'] == 'taskcluster'] for job in jobtype: # we need to retranslate the jobtype back to the proper data form after all. job[2] = job[2].replace('e10s-browser-chrome', 'browser-chrome-e10s') job[2] = job[2].replace('e10s-devtools-chrome', 'devtools-chrome-e10s') job[2] = job[2].replace('gl-', 'webgl-') for j in taskcluster_jobs: if job[2] in j['name'] and j['platform'] == job[0] and j['buildtype'] == job[1]: active_jobs.append(j['ref_data_name']) jobtype = active_jobs retVal[date] = jobtype return {"jobtypes": retVal}
def _update_job_priority_table(data): """Add new jobs to the priority table and update the build system if required.""" LOG.info('Fetch all rows from the job priority table.') # Get all rows of job priorities db_data = session.query(JobPriorities.id, JobPriorities.testtype, JobPriorities.buildtype, JobPriorities.platform, JobPriorities.priority, JobPriorities.timeout, JobPriorities.expires, JobPriorities.buildsystem).all() # TODO: write test for this # When the table is empty it means that we're starting the system for the first time # and we're going to use different default values map = {} if not len(db_data) == 0: priority = 1 timeout = 0 # Using %Y-%m-%d fixes this issue: # Warning: Incorrect date value: '2016-10-28 17:36:58.153265' for column 'expires' at row 1 expiration_date = (datetime.datetime.now() + datetime.timedelta(days=14)).strftime("%Y-%m-%d") # Creating this data structure which will reduce how many times we iterate through the DB rows for row in db_data: key = tuple(row[1:4]) # This is guaranteed by a unique composite index for these 3 fields in models.py assert key not in map,\ '"{}" should be a unique row and that is unexpected.'.format(key) # (testtype, buildtype, platform) map[key] = {'pk': row[0], 'build_system_type': row[7]} else: priority = 5 timeout = 5400 expiration_date = None total_jobs = len(data) new_jobs = 0 failed_changes = 0 updated_jobs = 0 # Loop through sanitized jobs, add new jobs and update the build system if needed for job in data: _buildsystem = job["build_system_type"] key = _unique_key(job) if key in map: # We already know about this job, we might need to update the build system row_build_system_type = map[key]['build_system_type'] if row_build_system_type == '*' or _buildsystem == '*': # We don't need to update anything pass else: # We're seeing the job again but for another build system (e.g. buildbot vs # taskcluster). We need to change it to '*' if row_build_system_type != _buildsystem: _buildsystem = "*" # Update table with new buildsystem try: conn = engine.connect() statement = update(JobPriorities).where( JobPriorities.id == map[key]['pk_key']).values(buildsystem=_buildsystem) conn.execute(statement) LOG.info('Updated {}/{} from {} to {}'.format( job['testtype'], job['platform_option'], job['build_system_type'], _buildsystem )) updated_jobs += 1 except Exception as e: LOG.info("key = %s, buildsystem = %s" % (key, _buildsystem)) LOG.info("exception updating jobPriorities: %s" % e) else: # We have a new job from runnablejobs to add to our master list try: jobpriority = JobPriorities( str(job["testtype"]), str(job["platform_option"]), str(job["platform"]), priority, timeout, expiration_date, _buildsystem ) session.add(jobpriority) session.commit() LOG.info('New job was found ({},{},{},{})'.format( job['testtype'], job['platform_option'], job['platform'], _buildsystem,)) new_jobs += 1 except Exception as error: session.rollback() LOG.warning(error) failed_changes += 1 finally: session.close() LOG.info('We have {} new jobs and {} updated jobs out of {} total jobs processed.'.format( new_jobs, updated_jobs, total_jobs )) if failed_changes != 0: LOG.error('We have failed {} changes out of {} total jobs processed.'.format( failed_changes, total_jobs ))