def get_ts(self): abandoned_reviews = Abandoned(self.db, self.filters) merged_reviews = Merged(self.db, self.filters) submitted_reviews = Submitted(self.db, self.filters) abandoned = abandoned_reviews.get_ts() abandoned = completePeriodIds(abandoned, self.filters.period, self.filters.startdate, self.filters.enddate) # casting the type of the variable in order to use numpy # faster way to deal with datasets... abandoned_array = numpy.array(abandoned["abandoned"]) merged = merged_reviews.get_ts() merged = completePeriodIds(merged, self.filters.period, self.filters.startdate, self.filters.enddate) merged_array = numpy.array(merged["merged"]) submitted = submitted_reviews.get_ts() submitted = completePeriodIds(submitted, self.filters.period, self.filters.startdate, self.filters.enddate) submitted_array = numpy.array(submitted["submitted"]) bmi_array = (abandoned_array.astype(float) + merged_array.astype(float)) / submitted_array.astype(float) bmi = abandoned bmi.pop("abandoned") bmi["bmiscr"] = list(bmi_array) return bmi
def GetSentSummaryCompanies (period, startdate, enddate, identities_db, num_organizations, projects_db): count = 1 first_organizations = {} metric = DataSource.get_metrics("organizations", MLS) organizations = metric.get_list() for company in organizations: type_analysis = ["company", "'"+company+"'"] sent = EvolEmailsSent(period, startdate, enddate, identities_db, type_analysis, projects_db) sent = completePeriodIds(sent, period, startdate, enddate) # Rename field sent to company name sent[company] = sent["sent"] del sent['sent'] if (count <= num_organizations): #Case of organizations with entity in the dataset first_organizations = dict(first_organizations.items() + sent.items()) else : #Case of organizations that are aggregated in the field Others if 'Others' not in first_organizations: first_organizations['Others'] = sent[company] else: first_organizations['Others'] = [a+b for a, b in zip(first_organizations['Others'],sent[company])] count = count + 1 first_organizations = completePeriodIds(first_organizations, period, startdate, enddate) return(first_organizations)
def ticketsTimeToResponseByField(self, period, startdate, enddate, closed_condition, field, values_set): condition = "AND i." + field + " = '%s'" evol = {} for field_value in values_set: field_condition = condition % field_value fa_alias = 'tfa_%s' % field_value data = self.GetTimeToFirstAction(period, startdate, enddate, field_condition, fa_alias) if not isinstance(data[fa_alias], (list)): data[fa_alias] = [data[fa_alias]] data['date'] = [data['date']] if len(data[fa_alias]) == 0: continue time_to_fa = self.getMedianAndAvg(period, fa_alias, data['date'], data[fa_alias]) time_to_fa = completePeriodIds(time_to_fa, period, startdate, enddate) fc_alias = 'tfc_%s' % field_value data = self.GetTimeToFirstComment(period, startdate, enddate, field_condition, fc_alias) if not isinstance(data[fc_alias], (list)): data[fc_alias] = [data[fc_alias]] data['date'] = [data['date']] time_to_fc = self.getMedianAndAvg(period, fc_alias, data['date'], data[fc_alias]) time_to_fc = completePeriodIds(time_to_fc, period, startdate, enddate) tclosed_alias = 'ttc_%s' % field_value data = self.GetTimeClosed(period, startdate, enddate, closed_condition, field_condition, tclosed_alias) if not isinstance(data[tclosed_alias], (list)): data[tclosed_alias] = [data[tclosed_alias]] data['date'] = [data['date']] time_closed = self.getMedianAndAvg(period, tclosed_alias, data['date'], data[tclosed_alias]) time_closed = completePeriodIds(time_closed, period, startdate, enddate) evol = dict(evol.items() + time_to_fa.items() + time_to_fc.items() + time_closed.items()) return evol
def result(self, data_source, destdir = None): from vizgrimoire.SCR import SCR if data_source != SCR or destdir is None: return period = self.filters.period startdate = self.filters.startdate enddate = self.filters.enddate code_contrib = {} code_contrib["submitters"] = self.GetNewSubmitters() code_contrib["mergers"] = self.GetNewMergers() code_contrib["abandoners"] = self.GetNewAbandoners() createJSON(code_contrib, destdir+"/scr-code-contrib-new.json") code_contrib = {} code_contrib["submitters"] = self.GetGoneSubmitters() code_contrib["mergers"] = self.GetGoneMergers() code_contrib["abandoners"] = self.GetGoneAbandoners() createJSON(code_contrib, destdir+"/scr-code-contrib-gone.json") data = self.GetNewSubmittersActivity() evol = {} evol['people'] = {} for uuid in data['uuid']: pdata = self.db.GetPeopleEvolSubmissionsSCR(uuid, period, startdate, enddate) pdata = completePeriodIds(pdata, period, startdate, enddate) evol['people'][uuid] = {"submissions":pdata['submissions']} # Just to have the time series data evol = dict(evol.items() + pdata.items()) if 'changes' in evol: del evol['changes'] # closed (metrics) is included in people createJSON(evol, destdir+"/new-people-activity-scr-evolutionary.json") data = self.GetGoneSubmittersActivity() evol = {} evol['people'] = {} for uuid in data['uuid']: pdata = self.db.GetPeopleEvolSubmissionsSCR(uuid, period, startdate, enddate) pdata = completePeriodIds(pdata, period, startdate, enddate) evol['people'][uuid] = {"submissions":pdata['submissions']} # Just to have the time series data evol = dict(evol.items() + pdata.items()) if 'changes' in evol: del evol['changes'] # closed (metrics) is included in people createJSON(evol, destdir+"/gone-people-activity-scr-evolutionary.json") # data = GetPeopleLeaving() # createJSON(data, destdir+"/leaving-people-scr.json") evol = {} data = completePeriodIds(self.db.GetPeopleIntake(0,1), period, startdate, enddate) evol[period] = data[period] evol['id'] = data['id'] evol['date'] = data['date'] evol['num_people_1'] = data['people'] evol['num_people_1_5'] = completePeriodIds(self.db.GetPeopleIntake(1,5),period, startdate, enddate)['people'] evol['num_people_5_10'] = completePeriodIds(self.db.GetPeopleIntake(5,10), period, startdate, enddate)['people'] createJSON(evol, destdir+"/scr-people-intake-evolutionary.json")
def ticketsTimeOpenedByType(self, period, startdate, enddate, closed_condition, result_type): # Build a set of dates dates = completePeriodIds({period : []}, period, startdate, enddate)[period] dates.append(dates[-1] + 1) # add one more month if result_type == 'action': alias = "topened_tfa" elif result_type == 'comment': alias = "topened_tfc" else: alias = "topened" time_opened = self.getTicketsTimeOpened(period, dates, closed_condition, result_type, alias) time_opened = completePeriodIds(time_opened, period, startdate, enddate) return time_opened
def create_filter_report(filter_, period, startdate, enddate, destdir, npeople, identities_db): from vizgrimoire.report import Report items = Report.get_items() if items is None: items = QAForums.get_filter_items(filter_, startdate, enddate, identities_db) if items == None: return items = items['name'] filter_name = filter_.get_name() if not isinstance(items, list): items = [items] file_items = [] for item in items: if re.compile("^\..*").match(item) is not None: item = "_"+item file_items.append(item) fn = os.path.join(destdir, filter_.get_filename(QAForums())) createJSON(file_items, fn) for item in items: logging.info(item) filter_item = Filter(filter_.get_name(), item) evol_data = QAForums.get_evolutionary_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join(destdir, filter_item.get_evolutionary_filename(QAForums())) createJSON(completePeriodIds(evol_data, period, startdate, enddate), fn) agg = QAForums.get_agg_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join(destdir, filter_item.get_static_filename(QAForums())) createJSON(agg, fn)
def get_ts(self): if self.filters.period != 'month': msg = 'Period %s not valid. Currently, only "month" is supported' % \ self.filters.period return ValueError(msg) fields = Set([]) tables = Set([]) filters = Set([]) fields.add("unique_visitors uvisitors") tables.add("visits_month v") query = self.db.BuildQuery(self.filters.period, self.filters.startdate, self.filters.enddate, "v.date", fields, tables, filters, True, self.filters.type_analysis, strict=True) ts = self.db.ExecuteQuery(query) ts = completePeriodIds(ts, self.filters.period, self.filters.startdate, self.filters.enddate) return ts
def create_filter_report(filter_, period, startdate, enddate, destdir, npeople, identities_db): from vizgrimoire.report import Report items = Report.get_items() if items is None: items = IRC.get_filter_items(filter_, startdate, enddate, identities_db) if (items == None): return if not isinstance(items, (list)): items = [items] fn = os.path.join(destdir, filter_.get_filename(IRC())) createJSON(items, fn) for item in items: # item_name = "'"+ item+ "'" logging.info(item) filter_item = Filter(filter_.get_name(), item) evol_data = IRC.get_evolutionary_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join(destdir, filter_item.get_evolutionary_filename(IRC())) createJSON( completePeriodIds(evol_data, period, startdate, enddate), fn) agg = IRC.get_agg_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join(destdir, filter_item.get_static_filename(IRC())) createJSON(agg, fn)
def create_filter_report(filter_, period, startdate, enddate, destdir, npeople, identities_db): from vizgrimoire.report import Report items = Report.get_items() if items is None: items = IRC.get_filter_items(filter_, startdate, enddate, identities_db) if (items == None): return if not isinstance(items, (list)): items = [items] fn = os.path.join(destdir, filter_.get_filename(IRC())) createJSON(items, fn) for item in items : # item_name = "'"+ item+ "'" logging.info (item) filter_item = Filter(filter_.get_name(), item) evol_data = IRC.get_evolutionary_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join(destdir, filter_item.get_evolutionary_filename(IRC())) createJSON(completePeriodIds(evol_data, period, startdate, enddate), fn) agg = IRC.get_agg_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join(destdir, filter_item.get_static_filename(IRC())) createJSON(agg, fn)
def fill_items(items, data, id_field, evol = False, period = None, startdate = None, enddate = None): """ Complete data dict items filling with 0 not existing items """ from vizgrimoire.GrimoireUtils import completePeriodIds # This fields should not be modified ts_fields = [period, 'unixtime', 'date','id'] if evol: zero_ts = completePeriodIds({id_field:[],period:[]}, period, startdate, enddate)[id_field] fields = data.keys() if id_field not in fields: logging.info("[fill_items] " + id_field + " not found in " + ",".join(data)) return data fields.remove(id_field) for id in items: if id not in data[id_field]: data[id_field].append(id) for field in fields: if field in ts_fields: continue if not evol: data[field].append(0) if evol: data[field].append(zero_ts) return data
def GetClosedSummaryCompanies (period, startdate, enddate, identities_db, closed_condition, num_organizations): from vizgrimoire.ITS import ITS count = 1 first_organizations = {} metric = DataSource.get_metrics("organizations", ITS) organizations = metric.get_list() organizations = organizations['name'] for company in organizations: type_analysis = ["company", "'"+company+"'"] filter_com = MetricFilters(period, startdate, enddate, type_analysis) mclosed = ITS.get_metrics("closed", ITS) mclosed.filters = filter_com closed = mclosed.get_ts() # Rename field closed to company name closed[company] = closed["closed"] del closed['closed'] if (count <= num_organizations): #Case of organizations with entity in the dataset first_organizations = dict(first_organizations.items() + closed.items()) else : #Case of organizations that are aggregated in the field Others if 'Others' not in first_organizations: first_organizations['Others'] = closed[company] else: first_organizations['Others'] = [a+b for a, b in zip(first_organizations['Others'],closed[company])] count = count + 1 first_organizations = completePeriodIds(first_organizations, period, startdate, enddate) return(first_organizations)
def _complete_period_ids_items(data, id_field, period, startdate, enddate): ts = {} # Complete the time series and share the date series metrics = data.keys() if id_field not in data: raise Exception(id_field + " not in " + str(data)) metrics.remove(id_field) metrics.remove(period) ts[id_field] = data[id_field] for metric in metrics: ts[metric] = [] for i in range (0, len(ts[id_field])): for metric in metrics: # Standard time series for each metric and convert metric_ts = {} metric_ts[metric] = data[metric][i] metric_ts[period] = data[period][i] metric_ts = completePeriodIds(metric_ts, period, startdate, enddate) ts[metric].append(metric_ts[metric]) # Add additional time series fields: unixtime metric_ts.pop(metric) # just time series generic fields ts = dict(ts.items()+metric_ts.items()) return ts
def get_ts (self): """Returns a time series of a specific class A timeseries consists of a unixtime date, labels, some other fields and the data of the specific instantiated class per period. This is built on a hash table. This also returns a proper timeseries with a 0-filled array if needed. """ query = self._get_sql(True) ts = self.db.ExecuteQuery(query) if self.filters.type_analysis and self.filters.type_analysis[1] is None: id_field = self.db.get_group_field(self.filters.type_analysis[0]) if 'CONCAT' not in id_field: id_field = id_field.split('.')[1] # remove table name ts = Metrics._convert_group_to_ts(ts, id_field) ts = Metrics._complete_period_ids_items(ts, id_field, self.filters.period, self.filters.startdate, self.filters.enddate) else: ts = completePeriodIds(ts, self.filters.period, self.filters.startdate, self.filters.enddate) return ts
def result(self): fields = Set([]) tables = Set([]) filters = Set([]) query = """select distinct(new_value) as states from changes where field = 'Status' """ states = self.db.ExecuteQuery(query) data = {} for state in states["states"]: query = self._sql(state) state_data = self.db.ExecuteQuery(query) state_data = completePeriodIds(state_data, self.filters.period, self.filters.startdate, self.filters.enddate) if not data: data = state_data data[state] = data["changes"] data.pop("changes") # remove not needed data else: data[state] = state_data["changes"] # TODO: Hardcoded creation of file #createJSON(data, "../../../../json/its-changes.json") return data
def result(self): fields = Set([]) tables = Set([]) filters = Set([]) query = """select distinct(new_value) as states from changes where field = 'Status' """ states = self.db.ExecuteQuery(query) data = {} for state in states["states"]: if state <> 'Resolved': continue query = self._sql(state) state_data = self.db.ExecuteQuery(query) state_data = completePeriodIds(state_data, self.filters.period, self.filters.startdate, self.filters.enddate) if not data: data = state_data data[state] = data["changes"] data.pop("changes") # remove not needed data else: data[state] = state_data["changes"] # TODO: Hardcoded creation of file createJSON(data, "../../../../json/its-changes.json") return data
def get_person_evol(cls, uuid, period, startdate, enddate, identities_db, type_analysis): closed_condition = cls._get_closed_condition() evol = GetPeopleEvolITS(uuid, period, startdate, enddate, closed_condition) evol = completePeriodIds(evol, period, startdate, enddate) return evol
def create_filter_report(filter_, period, startdate, enddate, destdir, npeople, identities_db): from vizgrimoire.report import Report items = Report.get_items() if items is None: items = EventsDS.get_filter_items(filter_, startdate, enddate, identities_db) if (items == None): return filter_name = filter_.get_name() items = items['name'] if not isinstance(items, list): items = [items] file_items = [] for item in items: if re.compile("^\..*").match(item) is not None: item = "_" + item file_items.append(item) fn = os.path.join(destdir, filter_.get_filename(EventsDS())) createJSON(file_items, fn) if filter_name in ("repository"): items_list = {'name': [], 'events_365': [], 'rsvps_365': []} else: items_list = items for item in items: logging.info(item) filter_item = Filter(filter_.get_name(), item) evol_data = EventsDS.get_evolutionary_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join( destdir, filter_item.get_evolutionary_filename(EventsDS())) createJSON( completePeriodIds(evol_data, period, startdate, enddate), fn) agg = EventsDS.get_agg_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join(destdir, filter_item.get_static_filename(EventsDS())) createJSON(agg, fn) if filter_name in ("repository"): items_list['name'].append(item.replace('/', '_')) items_list['events_365'].append(agg['events_365']) items_list['rsvps_365'].append(agg['rsvps_365']) EventsDS.create_filter_report_top(filter_, period, startdate, enddate, destdir, npeople, identities_db) fn = os.path.join(destdir, filter_.get_filename(EventsDS())) createJSON(items_list, fn)
def get_current_states(self, states): current_states = {} for state in states: query = self.__get_sql_current__(state, True) data = self.db.ExecuteQuery(query) data = completePeriodIds(data, self.filters.period, self.filters.startdate, self.filters.enddate) current_states = dict(current_states.items() + data.items()) return current_states
def get_ts(self): #Specific needs for Added and Removed lines not considered in meta class Metrics query = self._get_sql(True) data = self.db.ExecuteQuery(query) if not (isinstance(data['removed_lines'], list)): data['removed_lines'] = [data['removed_lines']] if not (isinstance(data['added_lines'], list)): data['added_lines'] = [data['added_lines']] data['removed_lines'] = [float(lines) for lines in data['removed_lines']] data['added_lines'] = [float(lines) for lines in data['added_lines']] return completePeriodIds(data, self.filters.period, self.filters.startdate, self.filters.enddate)
def GetCommitsSummaryCompanies(period, startdate, enddate, identities_db, num_organizations): # This function returns the following dataframe structrure # unixtime, date, week/month/..., company1, company2, ... company[num_organizations -1], others # The 3 first fields are used for data and ordering purposes # The "companyX" fields are those that provide info about that company # The "Others" field is the aggregated value of the rest of the organizations # Companies above num_organizations will be aggregated in Others from vizgrimoire.SCM import SCM metric = DataSource.get_metrics("organizations", SCM) organizations = metric.get_list() organizations = organizations['name'] first_organizations = {} count = 1 for company in organizations: company_name = "'" + company + "'" type_analysis = ['company', company_name] mcommits = DataSource.get_metrics("commits", SCM) mfilter = MetricFilters(period, startdate, enddate, type_analysis) mfilter_orig = mcommits.filters mcommits.filters = mfilter commits = mcommits.get_ts() mcommits.filters = mfilter_orig # commits = EvolCommits(period, startdate, enddate, identities_db, ["company", company_name]) # commits = completePeriodIds(commits, period, startdate, enddate) # Rename field commits to company name commits[company] = commits["commits"] del commits['commits'] if (count <= num_organizations): #Case of organizations with entity in the dataset first_organizations = dict(first_organizations.items() + commits.items()) else: #Case of organizations that are aggregated in the field Others if 'Others' not in first_organizations: first_organizations['Others'] = commits[company] else: first_organizations['Others'] = [ a + b for a, b in zip(first_organizations['Others'], commits[company]) ] count = count + 1 #TODO: remove global variables... first_organizations = completePeriodIds(first_organizations, period, startdate, enddate) return (first_organizations)
def ticketsTimeOpenedByField(self, period, startdate, enddate, closed_condition, field, values_set, result_type): condition = "AND " + field + " = '%s'" evol = {} # Build a set of dates dates = completePeriodIds({period : []}, period, startdate, enddate)[period] dates.append(dates[-1] + 1) # add one more month for field_value in values_set: field_condition = condition % field_value if result_type == 'action': alias = "topened_tfa_%s" % field_value elif result_type == 'comment': alias = "topened_tfc_%s" % field_value else: alias = "topened_%s" % field_value time_opened = self.getTicketsTimeOpened(period, dates, closed_condition, result_type, alias, field_condition) time_opened = completePeriodIds(time_opened, period, startdate, enddate) evol = dict(evol.items() + time_opened.items()) return evol
def GetSentSummaryCompanies(period, startdate, enddate, identities_db, num_organizations, projects_db): count = 1 first_organizations = {} metric = DataSource.get_metrics("organizations", MLS) organizations = metric.get_list() for company in organizations: type_analysis = ["company", "'" + company + "'"] sent = EvolEmailsSent(period, startdate, enddate, identities_db, type_analysis, projects_db) sent = completePeriodIds(sent, period, startdate, enddate) # Rename field sent to company name sent[company] = sent["sent"] del sent['sent'] if (count <= num_organizations): #Case of organizations with entity in the dataset first_organizations = dict(first_organizations.items() + sent.items()) else: #Case of organizations that are aggregated in the field Others if 'Others' not in first_organizations: first_organizations['Others'] = sent[company] else: first_organizations['Others'] = [ a + b for a, b in zip(first_organizations['Others'], sent[company]) ] count = count + 1 first_organizations = completePeriodIds(first_organizations, period, startdate, enddate) return (first_organizations)
def get_ts(self): # Get all posts for each month and determine which from those # are still unanswered. Returns the number of unanswered # posts on each month. period = self.filters.period if (self.filters.type_analysis and self.filters.type_analysis[0] not in ("repository")): return {} if (period != "month"): logging.error("Period not supported in " + self.id + " " + period) return None startdate = self.filters.startdate enddate = self.filters.enddate start = datetime.strptime(startdate, "'%Y-%m-%d'") end = datetime.strptime(enddate, "'%Y-%m-%d'") start_month = (start.year * 12 + start.month) - 1 end_month = (end.year * 12 + end.month) - 1 months = end_month - start_month + 2 num_unanswered = {'month': [], 'unanswered_posts': []} for i in range(0, months): unanswered = [] current_month = start_month + i from_date = self.__get_date_from_month(current_month) to_date = self.__get_date_from_month(current_month + 1) messages = self.__get_messages(from_date, to_date) for message in messages: message_id = message[0] response_of = message[1] if response_of is None: unanswered.append(message_id) continue if response_of in unanswered: unanswered.remove(response_of) num_unanswered['month'].append(current_month) num_unanswered['unanswered_posts'].append(len(unanswered)) return completePeriodIds(num_unanswered, self.filters.period, self.filters.startdate, self.filters.enddate)
def get_ts(self): # Get all posts for each month and determine which from those # are still unanswered. Returns the number of unanswered # posts on each month. period = self.filters.period if (self.filters.type_analysis and self.filters.type_analysis[0] not in ("repository")): return {} if (period != "month"): logging.error("Period not supported in " + self.id + " " + period) return None startdate = self.filters.startdate enddate = self.filters.enddate start = datetime.strptime(startdate, "'%Y-%m-%d'") end = datetime.strptime(enddate, "'%Y-%m-%d'") start_month = (start.year * 12 + start.month) - 1 end_month = (end.year * 12 + end.month) - 1 months = end_month - start_month + 2 num_unanswered = {'month' : [], 'unanswered_posts' : []} for i in range(0, months): unanswered = [] current_month = start_month + i from_date = self.__get_date_from_month(current_month) to_date = self.__get_date_from_month(current_month + 1) messages = self.__get_messages(from_date, to_date) for message in messages: message_id = message[0] response_of = message[1] if response_of is None: unanswered.append(message_id) continue if response_of in unanswered: unanswered.remove(response_of) num_unanswered['month'].append(current_month) num_unanswered['unanswered_posts'].append(len(unanswered)) return completePeriodIds(num_unanswered, self.filters.period, self.filters.startdate, self.filters.enddate)
def create_filter_report(filter_, period, startdate, enddate, destdir, npeople, identities_db): from vizgrimoire.report import Report items = Report.get_items() if items is None: items = EventsDS.get_filter_items(filter_, startdate, enddate, identities_db) if (items == None): return filter_name = filter_.get_name() items = items['name'] if not isinstance(items, list): items = [items] file_items = [] for item in items: if re.compile("^\..*").match(item) is not None: item = "_"+item file_items.append(item) fn = os.path.join(destdir, filter_.get_filename(EventsDS())) createJSON(file_items, fn) if filter_name in ("repository"): items_list = {'name' : [], 'events_365' : [], 'rsvps_365' : []} else: items_list = items for item in items: logging.info(item) filter_item = Filter(filter_.get_name(), item) evol_data = EventsDS.get_evolutionary_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join(destdir, filter_item.get_evolutionary_filename(EventsDS())) createJSON(completePeriodIds(evol_data, period, startdate, enddate), fn) agg = EventsDS.get_agg_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join(destdir, filter_item.get_static_filename(EventsDS())) createJSON(agg, fn) if filter_name in ("repository"): items_list['name'].append(item.replace('/', '_')) items_list['events_365'].append(agg['events_365']) items_list['rsvps_365'].append(agg['rsvps_365']) EventsDS.create_filter_report_top(filter_, period, startdate, enddate, destdir, npeople, identities_db) fn = os.path.join(destdir, filter_.get_filename(EventsDS())) createJSON(items_list, fn)
def fill_items(items, data, id_field, evol=False, period=None, startdate=None, enddate=None): """ Complete data dict items filling with 0 not existing items """ from vizgrimoire.GrimoireUtils import completePeriodIds # This fields should not be modified ts_fields = [period, 'unixtime', 'date', 'id'] if evol: zero_ts = completePeriodIds({ id_field: [], period: [] }, period, startdate, enddate)[id_field] fields = data.keys() if id_field not in fields: logging.info("[fill_items] " + id_field + " not found in " + ",".join(data)) return data fields.remove(id_field) if not isinstance(items, list): items_ids = [items] else: items_ids = items if type(data[id_field]) != list: data[id_field] = [data[id_field]] for id in items_ids: if id not in data[id_field]: data[id_field].append(id) for field in fields: if type(data[field]) != list: data[field] = [data[field]] if field in ts_fields: continue if not evol: data[field].append(0) if evol: data[field].append(zero_ts) return data
def get_ts(self): #Specific needs for Added and Removed lines not considered in meta class Metrics query = self._get_sql(True) data = self.db.ExecuteQuery(query) if not (isinstance(data['removed_lines'], list)): data['removed_lines'] = [data['removed_lines']] if not (isinstance(data['added_lines'], list)): data['added_lines'] = [data['added_lines']] data['removed_lines'] = [ float(lines) for lines in data['removed_lines'] ] data['added_lines'] = [float(lines) for lines in data['added_lines']] return completePeriodIds(data, self.filters.period, self.filters.startdate, self.filters.enddate)
def GetCommitsSummaryCompanies (period, startdate, enddate, identities_db, num_organizations): # This function returns the following dataframe structrure # unixtime, date, week/month/..., company1, company2, ... company[num_organizations -1], others # The 3 first fields are used for data and ordering purposes # The "companyX" fields are those that provide info about that company # The "Others" field is the aggregated value of the rest of the organizations # Companies above num_organizations will be aggregated in Others from vizgrimoire.SCM import SCM metric = DataSource.get_metrics("organizations", SCM) organizations = metric.get_list() organizations = organizations['name'] first_organizations = {} count = 1 for company in organizations: company_name = "'"+company+"'" type_analysis = ['company', company_name] mcommits = DataSource.get_metrics("commits", SCM) mfilter = MetricFilters(period, startdate, enddate, type_analysis) mfilter_orig = mcommits.filters mcommits.filters = mfilter commits = mcommits.get_ts() mcommits.filters = mfilter_orig # commits = EvolCommits(period, startdate, enddate, identities_db, ["company", company_name]) # commits = completePeriodIds(commits, period, startdate, enddate) # Rename field commits to company name commits[company] = commits["commits"] del commits['commits'] if (count <= num_organizations): #Case of organizations with entity in the dataset first_organizations = dict(first_organizations.items() + commits.items()) else : #Case of organizations that are aggregated in the field Others if 'Others' not in first_organizations: first_organizations['Others'] = commits[company] else: first_organizations['Others'] = [a+b for a, b in zip(first_organizations['Others'],commits[company])] count = count + 1 #TODO: remove global variables... first_organizations = completePeriodIds(first_organizations, period, startdate, enddate) return(first_organizations)
def genDates(period, startdate, enddate): """ This function generates empty timeseries period This is typically used for metric classes that need to iterate through the several periods and populate the data structure. """ dates = createTimeSeries({}) dates.pop('id') dates[period] = [] dates = completePeriodIds(dates, period, startdate, enddate) # Remove zeros dates['date'] = [d for d in dates['date'] if d != 0] dates['unixtime'] = [d for d in dates['unixtime'] if d != 0] return dates
def create_filter_report(filter_, period, startdate, enddate, destdir, npeople, identities_db): from vizgrimoire.report import Report items = Report.get_items() if items is None: items = QAForums.get_filter_items(filter_, startdate, enddate, identities_db) if items == None: return items = items['name'] filter_name = filter_.get_name() if not isinstance(items, list): items = [items] file_items = [] for item in items: if re.compile("^\..*").match(item) is not None: item = "_" + item file_items.append(item) fn = os.path.join(destdir, filter_.get_filename(QAForums())) createJSON(file_items, fn) for item in items: logging.info(item) filter_item = Filter(filter_.get_name(), item) evol_data = QAForums.get_evolutionary_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join( destdir, filter_item.get_evolutionary_filename(QAForums())) createJSON( completePeriodIds(evol_data, period, startdate, enddate), fn) agg = QAForums.get_agg_data(period, startdate, enddate, identities_db, filter_item) fn = os.path.join(destdir, filter_item.get_static_filename(QAForums())) createJSON(agg, fn)
def GetClosedSummaryCompanies(period, startdate, enddate, identities_db, closed_condition, num_organizations): from vizgrimoire.ITS import ITS count = 1 first_organizations = {} metric = DataSource.get_metrics("organizations", ITS) organizations = metric.get_list() organizations = organizations['name'] for company in organizations: type_analysis = ["company", "'" + company + "'"] filter_com = MetricFilters(period, startdate, enddate, type_analysis) mclosed = ITS.get_metrics("closed", ITS) mclosed.filters = filter_com closed = mclosed.get_ts() # Rename field closed to company name closed[company] = closed["closed"] del closed['closed'] if (count <= num_organizations): #Case of organizations with entity in the dataset first_organizations = dict(first_organizations.items() + closed.items()) else: #Case of organizations that are aggregated in the field Others if 'Others' not in first_organizations: first_organizations['Others'] = closed[company] else: first_organizations['Others'] = [ a + b for a, b in zip(first_organizations['Others'], closed[company]) ] count = count + 1 first_organizations = completePeriodIds(first_organizations, period, startdate, enddate) return (first_organizations)
def get_ts(self): if self.filters.period != 'month': msg = 'Period %s not valid. Currently, only "month" is supported' % \ self.filters.period return ValueError(msg) fields = Set([]) tables = Set([]) filters = Set([]) fields.add("unique_visitors uvisitors") tables.add("visits_month v") query = self.db.BuildQuery(self.filters.period, self.filters.startdate, self.filters.enddate, "v.date", fields, tables, filters, True, self.filters.type_analysis) ts = self.db.ExecuteQuery(query) ts = completePeriodIds(ts, self.filters.period, self.filters.startdate, self.filters.enddate) return ts
def get_ts (self): """Returns a time series of a specific class A timeseries consists of a unixtime date, labels, some other fields and the data of the specific instantiated class per period. This is built on a hash table. This also returns a proper timeseries with a 0-filled array if needed. """ query = self._get_sql(True) ts = self.db.ExecuteQuery(query) if self.filters.type_analysis and self.filters.type_analysis[1] is None: id_field = self.db.get_group_field_alias(self.filters.type_analysis[0]) ts = Metrics._convert_group_to_ts(ts, id_field) ts = Metrics._complete_period_ids_items(ts, id_field, self.filters.period, self.filters.startdate, self.filters.enddate) else: ts = completePeriodIds(ts, self.filters.period, self.filters.startdate, self.filters.enddate) return ts
def get_person_evol(uuid, period, startdate, enddate, identities_db, type_analysis): evol = GetEvolPeopleMLS(uuid, period, startdate, enddate) evol = completePeriodIds(evol, period, startdate, enddate) return evol
def _get_ts_all(): """ Get the metrics for all items at the same time """ all_items = self.db.get_all_items(self.filters.type_analysis) group_field = self.db.get_group_field(all_items) id_field = group_field.split('.')[1] # remove table name metrics = ["review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"] metrics = ["review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"] acc_pending_time_median_month = {"month":[],"name":[]} # Used to store each month all items data acc_pending_time_median = {"month":[]} # Used to store the final format for metric in metrics: acc_pending_time_median_month[metric] = [] acc_pending_time_median[metric] = [] # months = 2 # to debug for i in range(0, months+1): # Complete the skeletom of the data dict acc_pending_time_median_month["month"].append(start_month+i) acc_pending_time_median["month"].append(start_month+i) acc_pending_time_median_month["name"].append([]) for metric in metrics: acc_pending_time_median_month[metric].append([]) acc_pending_time_median[metric].append([]) for i in range(0, months+1): # First get all data from SQL newtime = self.db.ExecuteQuery(get_sql(start_month+i)) uploadtime = self.db.ExecuteQuery(get_sql(start_month+i, False, True)) newtime_rev = self.db.ExecuteQuery(get_sql(start_month+i, True)) # This is the slow query uploadtime_rev = self.db.ExecuteQuery(get_sql(start_month+i, True, True)) # Build a common list for all items all_items_month_ids = [] # for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]: for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]: checkListArray(data_sql) all_items_month_ids = list(Set(data_sql[id_field]+all_items_month_ids)) acc_pending_time_median_month["name"][i] = all_items_month_ids # Now add the data in a common dict for all metrics in this month # review time for item in all_items_month_ids: data_item = [] for j in range(0, len(newtime[id_field])): if newtime[id_field][j] == item: data_item.append(newtime['newtime'][j]) values = get_values_median(data_item) # print start_month+i, "newtime", item, values nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_days_acc_median'][i].append(values) # upload time for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime[id_field])): if uploadtime[id_field][j] == item: data_item.append(uploadtime['uploadtime'][j]) values = get_values_median(data_item) # print start_month+i, "upload", item, values nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_upload_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_upload_days_acc_median'][i].append(values) # review time reviewers for item in all_items_month_ids: # Now just for reviews waiting for Reviewer data_item = [] for j in range(0, len(newtime_rev[id_field])): if newtime_rev[id_field][j] == item: data_item.append(newtime_rev['newtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_ReviewsWaitingForReviewer_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_ReviewsWaitingForReviewer_days_acc_median'][i].append(values) # upload time reviewers for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime_rev[id_field])): if uploadtime_rev[id_field][j] == item: data_item.append(uploadtime_rev['uploadtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_upload_ReviewsWaitingForReviewer_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'][i].append(values) # Now we need to consolidate all names in a single list all_items = [] for lnames in acc_pending_time_median_month['name']: all_items = list(Set(lnames+all_items)) # And now time to create the final version that should be completePeriod for item in all_items: # Add the ts for the item to the final dict for i in range(0, months+1): mitems = acc_pending_time_median_month['name'][i] found_item = False for k in range(0, len(mitems)): if mitems[k] == item: # Found the item, get all metrics for this month found_item = True for metric in metrics: item_metric_month_value = acc_pending_time_median_month[metric][i][k] acc_pending_time_median[metric][i].append(item_metric_month_value) if not found_item: for metric in metrics: # 0 reviews, 0 review time acc_pending_time_median[metric][i].append(0) # Now we need to completePeriods to add time series fields # All the time series are already complete because the way they are built # but we miss some time series fields ts_fields = ['unixtime','date','month','id'] ts_aux = {} ts_aux['month'] = acc_pending_time_median['month'] ts_aux = completePeriodIds(ts_aux, self.filters.period, self.filters.startdate, self.filters.enddate) for field in ts_fields: acc_pending_time_median[field] = ts_aux[field] # After completing the time series, add the name/url series if self.filters.type_analysis[0] != "repository": acc_pending_time_median["name"] = all_items else: acc_pending_time_median["url"] = all_items # And now we need to adjust the format from # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1,URL2_M1], [URL1_M2, URL2_M2],[URL1_M3...]...] # to # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1, URL1_M2, URL1_M3],[URL2_M1...]...] time_to = {} for field in acc_pending_time_median: if field not in metrics: time_to[field] = acc_pending_time_median[field] else: # The new metric field will have an array per item with the time series time_to[field] = [] for i in range(0,len(all_items)): time_to[field].append([]) for metrics_month in acc_pending_time_median[field]: for j in range(0,len(all_items)): time_to[field][j].append(metrics_month[j]) return time_to
def get_ts(self): # Get all reviews pending time for each month and compute the median. # Return a list with all the medians for all months def get_date_from_month(monthid): # month format: year*12+month year = (monthid-1) / 12 month = monthid - year*12 # We need the last day of the month import calendar last_day = calendar.monthrange(year, month)[1] current = str(year)+"-"+str(month)+"-"+str(last_day) return (current) # SQL for all, for upload or for waiting for reviewer reviews def get_sql(month, reviewers = False, uploaded = False): current = get_date_from_month(month) sql_max_patchset = self.db.get_sql_max_patchset_for_reviews (current) sql_reviews_reviewed = self.db.get_sql_reviews_reviewed(startdate, current) sql_reviews_closed = self.db.get_sql_reviews_closed(startdate, current) # List of pending reviews before a date: time from new time and from last upload fields = "TIMESTAMPDIFF(SECOND, submitted_on, '"+current+"')/(24*3600) AS newtime," if (uploaded): fields = "TIMESTAMPDIFF(SECOND, ch.changed_on, '"+current+"')/(24*3600) AS uploadtime," fields += " YEAR(i.submitted_on)*12+MONTH(i.submitted_on) as month" all_items = self.db.get_all_items(self.filters.type_analysis) if all_items: group_field = self.db.get_group_field(all_items) fields = group_field + ", " + fields tables = Set([]) filters = Set([]) tables.add("issues i") tables.add("people") tables.add("issues_ext_gerrit ie") if (uploaded): tables.add("changes ch") tables.add("("+sql_max_patchset+") last_patch") tables.union_update(self.db.GetSQLReportFrom(self.filters)) tables = self.db._get_tables_query(tables) filters.add("people.id=i.submitted_by") filters.add("ie.issue_id=i.id") filters.add("i.id NOT IN ("+ sql_reviews_closed +")") if (uploaded): filters.add("ch.issue_id = i.id") filters.add("i.id = last_patch.issue_id") filters.add("ch.old_value = last_patch.maxPatchset") filters.add("ch.field = 'Upload'") if reviewers: filters.add("i.id NOT IN (%s) " % (sql_reviews_reviewed)) filters.union_update(self.db.GetSQLReportWhere(self.filters,"issues")) filters = self.db._get_filters_query(filters) # All reviews before the month: accumulated key point filters += " HAVING month<=" + str(month) # Not include future submissions for current month analysis # We should no need it with the actual SQL which is correct if (uploaded): filters += " AND uploadtime >= 0" else: filters += " AND newtime >= 0" filters += " ORDER BY i.submitted_on" q = self.db.GetSQLGlobal('i.submitted_on', fields, tables, filters, startdate,enddate) return q def get_values_median(values): if not isinstance(values, list): values = [values] values = removeDecimals(values) if (len(values) == 0): values = float('nan') else: values = median(values) return values def _get_ts_all(): """ Get the metrics for all items at the same time """ all_items = self.db.get_all_items(self.filters.type_analysis) group_field = self.db.get_group_field(all_items) id_field = group_field.split('.')[1] # remove table name metrics = ["review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"] metrics = ["review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews"] acc_pending_time_median_month = {"month":[],"name":[]} # Used to store each month all items data acc_pending_time_median = {"month":[]} # Used to store the final format for metric in metrics: acc_pending_time_median_month[metric] = [] acc_pending_time_median[metric] = [] # months = 2 # to debug for i in range(0, months+1): # Complete the skeletom of the data dict acc_pending_time_median_month["month"].append(start_month+i) acc_pending_time_median["month"].append(start_month+i) acc_pending_time_median_month["name"].append([]) for metric in metrics: acc_pending_time_median_month[metric].append([]) acc_pending_time_median[metric].append([]) for i in range(0, months+1): # First get all data from SQL newtime = self.db.ExecuteQuery(get_sql(start_month+i)) uploadtime = self.db.ExecuteQuery(get_sql(start_month+i, False, True)) newtime_rev = self.db.ExecuteQuery(get_sql(start_month+i, True)) # This is the slow query uploadtime_rev = self.db.ExecuteQuery(get_sql(start_month+i, True, True)) # Build a common list for all items all_items_month_ids = [] # for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]: for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]: checkListArray(data_sql) all_items_month_ids = list(Set(data_sql[id_field]+all_items_month_ids)) acc_pending_time_median_month["name"][i] = all_items_month_ids # Now add the data in a common dict for all metrics in this month # review time for item in all_items_month_ids: data_item = [] for j in range(0, len(newtime[id_field])): if newtime[id_field][j] == item: data_item.append(newtime['newtime'][j]) values = get_values_median(data_item) # print start_month+i, "newtime", item, values nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_days_acc_median'][i].append(values) # upload time for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime[id_field])): if uploadtime[id_field][j] == item: data_item.append(uploadtime['uploadtime'][j]) values = get_values_median(data_item) # print start_month+i, "upload", item, values nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_upload_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_upload_days_acc_median'][i].append(values) # review time reviewers for item in all_items_month_ids: # Now just for reviews waiting for Reviewer data_item = [] for j in range(0, len(newtime_rev[id_field])): if newtime_rev[id_field][j] == item: data_item.append(newtime_rev['newtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_ReviewsWaitingForReviewer_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_ReviewsWaitingForReviewer_days_acc_median'][i].append(values) # upload time reviewers for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime_rev[id_field])): if uploadtime_rev[id_field][j] == item: data_item.append(uploadtime_rev['uploadtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month['review_time_pending_upload_ReviewsWaitingForReviewer_reviews'][i].append(nreviews) acc_pending_time_median_month['review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'][i].append(values) # Now we need to consolidate all names in a single list all_items = [] for lnames in acc_pending_time_median_month['name']: all_items = list(Set(lnames+all_items)) # And now time to create the final version that should be completePeriod for item in all_items: # Add the ts for the item to the final dict for i in range(0, months+1): mitems = acc_pending_time_median_month['name'][i] found_item = False for k in range(0, len(mitems)): if mitems[k] == item: # Found the item, get all metrics for this month found_item = True for metric in metrics: item_metric_month_value = acc_pending_time_median_month[metric][i][k] acc_pending_time_median[metric][i].append(item_metric_month_value) if not found_item: for metric in metrics: # 0 reviews, 0 review time acc_pending_time_median[metric][i].append(0) # Now we need to completePeriods to add time series fields # All the time series are already complete because the way they are built # but we miss some time series fields ts_fields = ['unixtime','date','month','id'] ts_aux = {} ts_aux['month'] = acc_pending_time_median['month'] ts_aux = completePeriodIds(ts_aux, self.filters.period, self.filters.startdate, self.filters.enddate) for field in ts_fields: acc_pending_time_median[field] = ts_aux[field] # After completing the time series, add the name/url series if self.filters.type_analysis[0] != "repository": acc_pending_time_median["name"] = all_items else: acc_pending_time_median["url"] = all_items # And now we need to adjust the format from # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1,URL2_M1], [URL1_M2, URL2_M2],[URL1_M3...]...] # to # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1, URL1_M2, URL1_M3],[URL2_M1...]...] time_to = {} for field in acc_pending_time_median: if field not in metrics: time_to[field] = acc_pending_time_median[field] else: # The new metric field will have an array per item with the time series time_to[field] = [] for i in range(0,len(all_items)): time_to[field].append([]) for metrics_month in acc_pending_time_median[field]: for j in range(0,len(all_items)): time_to[field][j].append(metrics_month[j]) return time_to startdate = self.filters.startdate enddate = self.filters.enddate identities_db = self.db.identities_db type_analysis = self.filters.type_analysis period = self.filters.period bots = [] start = datetime.strptime(startdate, "'%Y-%m-%d'") end = datetime.strptime(enddate, "'%Y-%m-%d'") if (period != "month"): logging.error("Period not supported in " + self.id + " " + period) return None start_month = start.year*12 + start.month end_month = end.year*12 + end.month months = end_month - start_month all_items = self.db.get_all_items(self.filters.type_analysis) if all_items: return _get_ts_all() acc_pending_time_median = {"month":[], "review_time_pending_reviews":[], "review_time_pending_days_acc_median":[], "review_time_pending_upload_reviews":[], "review_time_pending_upload_days_acc_median":[], "review_time_pending_ReviewsWaitingForReviewer_days_acc_median":[], "review_time_pending_ReviewsWaitingForReviewer_reviews":[], "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median":[], "review_time_pending_upload_ReviewsWaitingForReviewer_reviews":[],} for i in range(0, months+1): acc_pending_time_median['month'].append(start_month+i) reviews = self.db.ExecuteQuery(get_sql(start_month+i)) values = get_values_median(reviews['newtime']) if isinstance(reviews['newtime'], list): nreviews = len(reviews['newtime']) else: nreviews = 1 # sure 1? acc_pending_time_median['review_time_pending_reviews'].append(nreviews) acc_pending_time_median['review_time_pending_days_acc_median'].append(values) # upload time reviews = self.db.ExecuteQuery(get_sql(start_month+i, False, True)) values = get_values_median(reviews['uploadtime']) if isinstance(reviews['uploadtime'], list): nreviews = len(reviews['uploadtime']) else: nreviews = 1 acc_pending_time_median['review_time_pending_upload_reviews'].append(nreviews) acc_pending_time_median['review_time_pending_upload_days_acc_median'].append(values) # Now just for reviews waiting for Reviewer reviews = self.db.ExecuteQuery(get_sql(start_month+i, True)) values = get_values_median(reviews['newtime']) if isinstance(reviews['newtime'], list): nreviews = len(reviews['newtime']) else: nreviews = 1 acc_pending_time_median['review_time_pending_ReviewsWaitingForReviewer_reviews'].append(nreviews) acc_pending_time_median['review_time_pending_ReviewsWaitingForReviewer_days_acc_median'].append(values) reviews = self.db.ExecuteQuery(get_sql(start_month+i, True, True)) values = get_values_median(reviews['uploadtime']) if isinstance(reviews['uploadtime'], list): nreviews = len(reviews['uploadtime']) else: nreviews = 1 acc_pending_time_median['review_time_pending_upload_ReviewsWaitingForReviewer_reviews'].append(nreviews) acc_pending_time_median['review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'].append(values) # Normalize values removing NA and converting to 0. Maybe not a good idea. for m in acc_pending_time_median.keys(): for i in range(0,len(acc_pending_time_median[m])): acc_pending_time_median[m][i] = float(acc_pending_time_median[m][i]) return completePeriodIds(acc_pending_time_median, self.filters.period, self.filters.startdate, self.filters.enddate)
def result(self, data_source, destdir=None): from vizgrimoire.SCR import SCR if data_source != SCR or destdir is None: return period = self.filters.period startdate = self.filters.startdate enddate = self.filters.enddate code_contrib = {} code_contrib["submitters"] = self.GetNewSubmitters() code_contrib["mergers"] = self.GetNewMergers() code_contrib["abandoners"] = self.GetNewAbandoners() createJSON(code_contrib, destdir + "/scr-code-contrib-new.json") code_contrib = {} code_contrib["submitters"] = self.GetGoneSubmitters() code_contrib["mergers"] = self.GetGoneMergers() code_contrib["abandoners"] = self.GetGoneAbandoners() createJSON(code_contrib, destdir + "/scr-code-contrib-gone.json") data = self.GetNewSubmittersActivity() evol = {} evol['people'] = {} for uuid in data['uuid']: pdata = self.db.GetPeopleEvolSubmissionsSCR( uuid, period, startdate, enddate) pdata = completePeriodIds(pdata, period, startdate, enddate) evol['people'][uuid] = {"submissions": pdata['submissions']} # Just to have the time series data evol = dict(evol.items() + pdata.items()) if 'changes' in evol: del evol['changes'] # closed (metrics) is included in people createJSON(evol, destdir + "/new-people-activity-scr-evolutionary.json") data = self.GetGoneSubmittersActivity() evol = {} evol['people'] = {} for uuid in data['uuid']: pdata = self.db.GetPeopleEvolSubmissionsSCR( uuid, period, startdate, enddate) pdata = completePeriodIds(pdata, period, startdate, enddate) evol['people'][uuid] = {"submissions": pdata['submissions']} # Just to have the time series data evol = dict(evol.items() + pdata.items()) if 'changes' in evol: del evol['changes'] # closed (metrics) is included in people createJSON(evol, destdir + "/gone-people-activity-scr-evolutionary.json") # data = GetPeopleLeaving() # createJSON(data, destdir+"/leaving-people-scr.json") evol = {} data = completePeriodIds(self.db.GetPeopleIntake(0, 1), period, startdate, enddate) evol[period] = data[period] evol['id'] = data['id'] evol['date'] = data['date'] evol['num_people_1'] = data['people'] evol['num_people_1_5'] = completePeriodIds( self.db.GetPeopleIntake(1, 5), period, startdate, enddate)['people'] evol['num_people_5_10'] = completePeriodIds( self.db.GetPeopleIntake(5, 10), period, startdate, enddate)['people'] createJSON(evol, destdir + "/scr-people-intake-evolutionary.json")
def get_person_evol(uuid, period, startdate, enddate, identities_db, type_analysis): evol = Pullpo.get_people_query(uuid, startdate, enddate, True, period) evol = completePeriodIds(evol, period, startdate, enddate) return evol
def get_person_evol(uuid, period, startdate, enddate, identities_db, type_analysis): q = ReleasesDS._get_people_sql (uuid, period, startdate, enddate, True) return completePeriodIds(ExecuteQuery(q), period, startdate, enddate)
def get_ts_changes(self): query = self._get_sqlchanges(True) ts = self.db.ExecuteQuery(query) return completePeriodIds(ts, self.filters.period, self.filters.startdate, self.filters.enddate)
def get_backlog(self, states, backend_type): import datetime import time def update_backlog_count(current_count, backlog_count): for state, count in current_count.items(): backlog_count[state].append(count) # Dict to store the results data = {self.filters.period : [self.filters.startdate, self.filters.enddate]} data = completePeriodIds(data, self.filters.period, self.filters.startdate, self.filters.enddate) tickets_states = {} current_status = {} # Initialize structures for state in states: current_status[state] = 0 data[state] = [] # Request issues log query = self.__get_sql_issues_states__(backend_type) issues_log = self.db.ExecuteQuery(query) periods = list(data['unixtime'][1:]) # Add a one period more to avoid problems with # data from this period last_date = int(time.mktime(datetime.datetime.strptime( self.filters.enddate, "'%Y-%m-%d'").timetuple())) periods.append(last_date) end_period = int(periods.pop(0)) for i in range(len(issues_log['issue_id'])): issue_id = issues_log['issue_id'][i] issue_state = issues_log['status'][i] issue_date = int(issues_log['udate'][i]) # Fill periods without changes on issues states while issue_date >= end_period: end_period = int(periods.pop(0)) update_backlog_count(current_status, data) if issue_id not in tickets_states: # Add ticket to the status dict tickets_states[issue_id] = issue_state elif tickets_states[issue_id] != issue_state: # Decrease the count of tickets with the old state # only for predefined states old_state = tickets_states[issue_id] if old_state in states: current_status[old_state] -= 1 # Set new status tickets_states[issue_id] = issue_state else: continue # Ignore equal states # Increase the count of tickets with issue_state # only for predefined states if issue_state in states: current_status[issue_state] += 1 # End of the loop. Add the last period values to the backlog count update_backlog_count(current_status, data) # Fill remaining periods without changes on issues states while periods: periods.pop(0) for state in states: data[state].append(data[state][-1]) return data
def result(self, data_source, destdir=None): from vizgrimoire.SCM import SCM if data_source != SCM or destdir is None: return result_dict = {} period = self.filters.period startdate = self.filters.startdate enddate = self.filters.enddate code_contrib = {} code_contrib["authors"] = self.GetNewAuthors() result_dict['people_new'] = code_contrib createJSON(code_contrib, destdir + "/scm-code-contrib-new.json") code_contrib = {} code_contrib["authors"] = self.GetGoneAuthors() result_dict['people_gone'] = code_contrib createJSON(code_contrib, destdir + "/scm-code-contrib-gone.json") data = self.GetNewAuthorsActivity() evol = {} evol['people'] = {} for uuid in data['uuid']: pdata = self.db.GetEvolPeopleSCM(uuid, period, startdate, enddate) pdata = completePeriodIds(pdata, period, startdate, enddate) evol['people'][uuid] = {"commits": pdata['commits']} # Just to have the time series data evol = dict(evol.items() + pdata.items()) result_dict['people_new_ts'] = evol createJSON(evol, destdir + "/new-people-activity-scm-evolutionary.json") data = self.GetGoneAuthorsActivity() evol = {} evol['people'] = {} for uuid in data['uuid']: pdata = self.db.GetEvolPeopleSCM(uuid, period, startdate, enddate) pdata = completePeriodIds(pdata, period, startdate, enddate) evol['people'][uuid] = {"commits": pdata['commits']} # Just to have the time series data evol = dict(evol.items() + pdata.items()) if 'changes' in evol: del evol['changes'] # closed (metrics) is included in people result_dict['people_gone_ts'] = evol createJSON(evol, destdir + "/gone-people-activity-scm-evolutionary.json") # data = GetPeopleLeaving() # createJSON(data, destdir+"/leaving-people-scr.json") evol = {} data = completePeriodIds(self.db.GetPeopleIntake(0, 1), period, startdate, enddate) evol[period] = data[period] evol['id'] = data['id'] evol['date'] = data['date'] evol['num_people_1'] = data['people'] evol['num_people_1_5'] = completePeriodIds( self.db.GetPeopleIntake(1, 5), period, startdate, enddate)['people'] evol['num_people_5_10'] = completePeriodIds( self.db.GetPeopleIntake(5, 10), period, startdate, enddate)['people'] result_dict['people_intake_ts'] = evol createJSON(evol, destdir + "/scm-people-intake-evolutionary.json") return result_dict
def get_ts(self): # Get all reviews pending time for each month and compute the median. # Return a list with all the medians for all months def get_date_from_month(monthid): # month format: year*12+month year = (monthid - 1) / 12 month = monthid - year * 12 # We need the last day of the month import calendar last_day = calendar.monthrange(year, month)[1] current = str(year) + "-" + str(month) + "-" + str(last_day) return (current) # SQL for all, for upload or for waiting for reviewer reviews def get_sql(month, reviewers=False, uploaded=False): current = get_date_from_month(month) sql_max_patchset = self.db.get_sql_max_patchset_for_reviews( current) sql_reviews_reviewed = self.db.get_sql_reviews_reviewed( startdate, current) sql_reviews_closed = self.db.get_sql_reviews_closed( startdate, current) # List of pending reviews before a date: time from new time and from last upload fields = "TIMESTAMPDIFF(SECOND, submitted_on, '" + current + "')/(24*3600) AS newtime," if (uploaded): fields = "TIMESTAMPDIFF(SECOND, ch.changed_on, '" + current + "')/(24*3600) AS uploadtime," fields += " YEAR(i.submitted_on)*12+MONTH(i.submitted_on) as month" all_items = self.db.get_all_items(self.filters.type_analysis) if all_items: group_field = self.db.get_group_field(all_items) fields = group_field + ", " + fields tables = Set([]) filters = Set([]) tables.add("issues i") tables.add("people") tables.add("issues_ext_gerrit ie") if (uploaded): tables.add("changes ch") tables.add("(" + sql_max_patchset + ") last_patch") tables.union_update(self.db.GetSQLReportFrom(self.filters)) tables = self.db._get_tables_query(tables) filters.add("people.id=i.submitted_by") filters.add("ie.issue_id=i.id") filters.add("i.id NOT IN (" + sql_reviews_closed + ")") if (uploaded): filters.add("ch.issue_id = i.id") filters.add("i.id = last_patch.issue_id") filters.add("ch.old_value = last_patch.maxPatchset") filters.add("ch.field = 'Upload'") if reviewers: filters.add("i.id NOT IN (%s) " % (sql_reviews_reviewed)) filters.union_update( self.db.GetSQLReportWhere(self.filters, "issues")) filters = self.db._get_filters_query(filters) # All reviews before the month: accumulated key point filters += " HAVING month<=" + str(month) # Not include future submissions for current month analysis # We should no need it with the actual SQL which is correct if (uploaded): filters += " AND uploadtime >= 0" else: filters += " AND newtime >= 0" filters += " ORDER BY i.submitted_on" q = self.db.GetSQLGlobal('i.submitted_on', fields, tables, filters, startdate, enddate) return q def get_values_median(values): if not isinstance(values, list): values = [values] values = removeDecimals(values) if (len(values) == 0): values = float('0') else: values = median(values) return values def _get_ts_all(): """ Get the metrics for all items at the same time """ all_items = self.db.get_all_items(self.filters.type_analysis) group_field = self.db.get_group_field(all_items) id_field = group_field.split('.')[1] # remove table name metrics = [ "review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews" ] metrics = [ "review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews" ] acc_pending_time_median_month = { "month": [], "name": [] } # Used to store each month all items data acc_pending_time_median = { "month": [] } # Used to store the final format for metric in metrics: acc_pending_time_median_month[metric] = [] acc_pending_time_median[metric] = [] # months = 2 # to debug for i in range(0, months + 1): # Complete the skeletom of the data dict acc_pending_time_median_month["month"].append(start_month + i) acc_pending_time_median["month"].append(start_month + i) acc_pending_time_median_month["name"].append([]) for metric in metrics: acc_pending_time_median_month[metric].append([]) acc_pending_time_median[metric].append([]) for i in range(0, months + 1): # First get all data from SQL newtime = self.db.ExecuteQuery(get_sql(start_month + i)) uploadtime = self.db.ExecuteQuery( get_sql(start_month + i, False, True)) newtime_rev = self.db.ExecuteQuery( get_sql(start_month + i, True)) # This is the slow query uploadtime_rev = self.db.ExecuteQuery( get_sql(start_month + i, True, True)) # Build a common list for all items all_items_month_ids = [] # for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]: for data_sql in [ newtime, uploadtime, newtime_rev, uploadtime_rev ]: checkListArray(data_sql) all_items_month_ids = list( Set(data_sql[id_field] + all_items_month_ids)) acc_pending_time_median_month["name"][i] = all_items_month_ids # Now add the data in a common dict for all metrics in this month # review time for item in all_items_month_ids: data_item = [] for j in range(0, len(newtime[id_field])): if newtime[id_field][j] == item: data_item.append(newtime['newtime'][j]) values = get_values_median(data_item) # print start_month+i, "newtime", item, values nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_reviews'][i].append(nreviews) acc_pending_time_median_month[ 'review_time_pending_days_acc_median'][i].append( values) # upload time for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime[id_field])): if uploadtime[id_field][j] == item: data_item.append(uploadtime['uploadtime'][j]) values = get_values_median(data_item) # print start_month+i, "upload", item, values nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_upload_reviews'][i].append( nreviews) acc_pending_time_median_month[ 'review_time_pending_upload_days_acc_median'][ i].append(values) # review time reviewers for item in all_items_month_ids: # Now just for reviews waiting for Reviewer data_item = [] for j in range(0, len(newtime_rev[id_field])): if newtime_rev[id_field][j] == item: data_item.append(newtime_rev['newtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_ReviewsWaitingForReviewer_reviews'][ i].append(nreviews) acc_pending_time_median_month[ 'review_time_pending_ReviewsWaitingForReviewer_days_acc_median'][ i].append(values) # upload time reviewers for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime_rev[id_field])): if uploadtime_rev[id_field][j] == item: data_item.append(uploadtime_rev['uploadtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_upload_ReviewsWaitingForReviewer_reviews'][ i].append(nreviews) acc_pending_time_median_month[ 'review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'][ i].append(values) # Now we need to consolidate all names in a single list all_items = [] for lnames in acc_pending_time_median_month['name']: all_items = list(Set(lnames + all_items)) # And now time to create the final version that should be completePeriod for item in all_items: # Add the ts for the item to the final dict for i in range(0, months + 1): mitems = acc_pending_time_median_month['name'][i] found_item = False for k in range(0, len(mitems)): if mitems[k] == item: # Found the item, get all metrics for this month found_item = True for metric in metrics: item_metric_month_value = acc_pending_time_median_month[ metric][i][k] acc_pending_time_median[metric][i].append( item_metric_month_value) if not found_item: for metric in metrics: # 0 reviews, 0 review time acc_pending_time_median[metric][i].append(0) # Now we need to completePeriods to add time series fields # All the time series are already complete because the way they are built # but we miss some time series fields ts_fields = ['unixtime', 'date', 'month', 'id'] ts_aux = {} ts_aux['month'] = acc_pending_time_median['month'] ts_aux = completePeriodIds(ts_aux, self.filters.period, self.filters.startdate, self.filters.enddate) for field in ts_fields: acc_pending_time_median[field] = ts_aux[field] # After completing the time series, add the name/url series if self.filters.type_analysis[0] != "repository": acc_pending_time_median["name"] = all_items else: acc_pending_time_median["url"] = all_items # And now we need to adjust the format from # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1,URL2_M1], [URL1_M2, URL2_M2],[URL1_M3...]...] # to # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1, URL1_M2, URL1_M3],[URL2_M1...]...] time_to = {} for field in acc_pending_time_median: if field not in metrics: time_to[field] = acc_pending_time_median[field] else: # The new metric field will have an array per item with the time series time_to[field] = [] for i in range(0, len(all_items)): time_to[field].append([]) for metrics_month in acc_pending_time_median[field]: for j in range(0, len(all_items)): time_to[field][j].append(metrics_month[j]) return time_to startdate = self.filters.startdate enddate = self.filters.enddate identities_db = self.db.identities_db type_analysis = self.filters.type_analysis period = self.filters.period bots = [] start = datetime.strptime(startdate, "'%Y-%m-%d'") end = datetime.strptime(enddate, "'%Y-%m-%d'") if (period != "month"): logging.error("Period not supported in " + self.id + " " + period) return None start_month = start.year * 12 + start.month end_month = end.year * 12 + end.month months = end_month - start_month all_items = self.db.get_all_items(self.filters.type_analysis) if all_items: return _get_ts_all() acc_pending_time_median = { "month": [], "review_time_pending_reviews": [], "review_time_pending_days_acc_median": [], "review_time_pending_upload_reviews": [], "review_time_pending_upload_days_acc_median": [], "review_time_pending_ReviewsWaitingForReviewer_days_acc_median": [], "review_time_pending_ReviewsWaitingForReviewer_reviews": [], "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median": [], "review_time_pending_upload_ReviewsWaitingForReviewer_reviews": [], } for i in range(0, months + 1): acc_pending_time_median['month'].append(start_month + i) reviews = self.db.ExecuteQuery(get_sql(start_month + i)) values = get_values_median(reviews['newtime']) if isinstance(reviews['newtime'], list): nreviews = len(reviews['newtime']) else: nreviews = 1 # sure 1? acc_pending_time_median['review_time_pending_reviews'].append( nreviews) acc_pending_time_median[ 'review_time_pending_days_acc_median'].append(values) # upload time reviews = self.db.ExecuteQuery( get_sql(start_month + i, False, True)) values = get_values_median(reviews['uploadtime']) if isinstance(reviews['uploadtime'], list): nreviews = len(reviews['uploadtime']) else: nreviews = 1 acc_pending_time_median[ 'review_time_pending_upload_reviews'].append(nreviews) acc_pending_time_median[ 'review_time_pending_upload_days_acc_median'].append(values) # Now just for reviews waiting for Reviewer reviews = self.db.ExecuteQuery(get_sql(start_month + i, True)) values = get_values_median(reviews['newtime']) if isinstance(reviews['newtime'], list): nreviews = len(reviews['newtime']) else: nreviews = 1 acc_pending_time_median[ 'review_time_pending_ReviewsWaitingForReviewer_reviews'].append( nreviews) acc_pending_time_median[ 'review_time_pending_ReviewsWaitingForReviewer_days_acc_median'].append( values) reviews = self.db.ExecuteQuery(get_sql(start_month + i, True, True)) values = get_values_median(reviews['uploadtime']) if isinstance(reviews['uploadtime'], list): nreviews = len(reviews['uploadtime']) else: nreviews = 1 acc_pending_time_median[ 'review_time_pending_upload_ReviewsWaitingForReviewer_reviews'].append( nreviews) acc_pending_time_median[ 'review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'].append( values) # Normalize values removing NA and converting to 0. Maybe not a good idea. for m in acc_pending_time_median.keys(): for i in range(0, len(acc_pending_time_median[m])): acc_pending_time_median[m][i] = float( acc_pending_time_median[m][i]) return completePeriodIds(acc_pending_time_median, self.filters.period, self.filters.startdate, self.filters.enddate)
def _get_ts_all(): """ Get the metrics for all items at the same time """ all_items = self.db.get_all_items(self.filters.type_analysis) group_field = self.db.get_group_field(all_items) id_field = group_field.split('.')[1] # remove table name metrics = [ "review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews" ] metrics = [ "review_time_pending_reviews", "review_time_pending_days_acc_median", "review_time_pending_upload_reviews", "review_time_pending_upload_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_ReviewsWaitingForReviewer_reviews", "review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median", "review_time_pending_upload_ReviewsWaitingForReviewer_reviews" ] acc_pending_time_median_month = { "month": [], "name": [] } # Used to store each month all items data acc_pending_time_median = { "month": [] } # Used to store the final format for metric in metrics: acc_pending_time_median_month[metric] = [] acc_pending_time_median[metric] = [] # months = 2 # to debug for i in range(0, months + 1): # Complete the skeletom of the data dict acc_pending_time_median_month["month"].append(start_month + i) acc_pending_time_median["month"].append(start_month + i) acc_pending_time_median_month["name"].append([]) for metric in metrics: acc_pending_time_median_month[metric].append([]) acc_pending_time_median[metric].append([]) for i in range(0, months + 1): # First get all data from SQL newtime = self.db.ExecuteQuery(get_sql(start_month + i)) uploadtime = self.db.ExecuteQuery( get_sql(start_month + i, False, True)) newtime_rev = self.db.ExecuteQuery( get_sql(start_month + i, True)) # This is the slow query uploadtime_rev = self.db.ExecuteQuery( get_sql(start_month + i, True, True)) # Build a common list for all items all_items_month_ids = [] # for data_sql in [newtime, uploadtime, newtime_rev, uploadtime_rev]: for data_sql in [ newtime, uploadtime, newtime_rev, uploadtime_rev ]: checkListArray(data_sql) all_items_month_ids = list( Set(data_sql[id_field] + all_items_month_ids)) acc_pending_time_median_month["name"][i] = all_items_month_ids # Now add the data in a common dict for all metrics in this month # review time for item in all_items_month_ids: data_item = [] for j in range(0, len(newtime[id_field])): if newtime[id_field][j] == item: data_item.append(newtime['newtime'][j]) values = get_values_median(data_item) # print start_month+i, "newtime", item, values nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_reviews'][i].append(nreviews) acc_pending_time_median_month[ 'review_time_pending_days_acc_median'][i].append( values) # upload time for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime[id_field])): if uploadtime[id_field][j] == item: data_item.append(uploadtime['uploadtime'][j]) values = get_values_median(data_item) # print start_month+i, "upload", item, values nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_upload_reviews'][i].append( nreviews) acc_pending_time_median_month[ 'review_time_pending_upload_days_acc_median'][ i].append(values) # review time reviewers for item in all_items_month_ids: # Now just for reviews waiting for Reviewer data_item = [] for j in range(0, len(newtime_rev[id_field])): if newtime_rev[id_field][j] == item: data_item.append(newtime_rev['newtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_ReviewsWaitingForReviewer_reviews'][ i].append(nreviews) acc_pending_time_median_month[ 'review_time_pending_ReviewsWaitingForReviewer_days_acc_median'][ i].append(values) # upload time reviewers for item in all_items_month_ids: data_item = [] for j in range(0, len(uploadtime_rev[id_field])): if uploadtime_rev[id_field][j] == item: data_item.append(uploadtime_rev['uploadtime'][j]) values = get_values_median(data_item) nreviews = len(data_item) acc_pending_time_median_month[ 'review_time_pending_upload_ReviewsWaitingForReviewer_reviews'][ i].append(nreviews) acc_pending_time_median_month[ 'review_time_pending_upload_ReviewsWaitingForReviewer_days_acc_median'][ i].append(values) # Now we need to consolidate all names in a single list all_items = [] for lnames in acc_pending_time_median_month['name']: all_items = list(Set(lnames + all_items)) # And now time to create the final version that should be completePeriod for item in all_items: # Add the ts for the item to the final dict for i in range(0, months + 1): mitems = acc_pending_time_median_month['name'][i] found_item = False for k in range(0, len(mitems)): if mitems[k] == item: # Found the item, get all metrics for this month found_item = True for metric in metrics: item_metric_month_value = acc_pending_time_median_month[ metric][i][k] acc_pending_time_median[metric][i].append( item_metric_month_value) if not found_item: for metric in metrics: # 0 reviews, 0 review time acc_pending_time_median[metric][i].append(0) # Now we need to completePeriods to add time series fields # All the time series are already complete because the way they are built # but we miss some time series fields ts_fields = ['unixtime', 'date', 'month', 'id'] ts_aux = {} ts_aux['month'] = acc_pending_time_median['month'] ts_aux = completePeriodIds(ts_aux, self.filters.period, self.filters.startdate, self.filters.enddate) for field in ts_fields: acc_pending_time_median[field] = ts_aux[field] # After completing the time series, add the name/url series if self.filters.type_analysis[0] != "repository": acc_pending_time_median["name"] = all_items else: acc_pending_time_median["url"] = all_items # And now we need to adjust the format from # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1,URL2_M1], [URL1_M2, URL2_M2],[URL1_M3...]...] # to # month:[M1, M2, M3], url:[URL1, URL2. ...], metric:[[URL1_M1, URL1_M2, URL1_M3],[URL2_M1...]...] time_to = {} for field in acc_pending_time_median: if field not in metrics: time_to[field] = acc_pending_time_median[field] else: # The new metric field will have an array per item with the time series time_to[field] = [] for i in range(0, len(all_items)): time_to[field].append([]) for metrics_month in acc_pending_time_median[field]: for j in range(0, len(all_items)): time_to[field][j].append(metrics_month[j]) return time_to
def get_person_evol(uuid, period, startdate, enddate, identities_db, type_analysis): q = ReleasesDS._get_people_sql(uuid, period, startdate, enddate, True) return completePeriodIds(ExecuteQuery(q), period, startdate, enddate)
def get_person_evol(uuid, period, startdate, enddate, identities_db, type_analysis): evol = GetEvolPeopleIRC(uuid, period, startdate, enddate) evol = completePeriodIds(evol, period, startdate, enddate) return evol
def result(self, data_source, destdir = None): from vizgrimoire.SCM import SCM if data_source != SCM or destdir is None: return result_dict = {} period = self.filters.period startdate = self.filters.startdate enddate = self.filters.enddate code_contrib = {} code_contrib["authors"] = self.GetNewAuthors() result_dict['people_new'] = code_contrib createJSON(code_contrib, destdir+"/scm-code-contrib-new.json") code_contrib = {} code_contrib["authors"] = self.GetGoneAuthors() result_dict['people_gone'] = code_contrib createJSON(code_contrib, destdir+"/scm-code-contrib-gone.json") data = self.GetNewAuthorsActivity() evol = {} evol['people'] = {} for uuid in data['uuid']: pdata = self.db.GetEvolPeopleSCM(uuid, period, startdate, enddate) pdata = completePeriodIds(pdata, period, startdate, enddate) evol['people'][uuid] = {"commits":pdata['commits']} # Just to have the time series data evol = dict(evol.items() + pdata.items()) result_dict['people_new_ts'] = evol createJSON(evol, destdir+"/new-people-activity-scm-evolutionary.json") data = self.GetGoneAuthorsActivity() evol = {} evol['people'] = {} for uuid in data['uuid']: pdata = self.db.GetEvolPeopleSCM(uuid, period, startdate, enddate) pdata = completePeriodIds(pdata, period, startdate, enddate) evol['people'][uuid] = {"commits":pdata['commits']} # Just to have the time series data evol = dict(evol.items() + pdata.items()) if 'changes' in evol: del evol['changes'] # closed (metrics) is included in people result_dict['people_gone_ts'] = evol createJSON(evol, destdir+"/gone-people-activity-scm-evolutionary.json") # data = GetPeopleLeaving() # createJSON(data, destdir+"/leaving-people-scr.json") evol = {} data = completePeriodIds(self.db.GetPeopleIntake(0,1), period, startdate, enddate) evol[period] = data[period] evol['id'] = data['id'] evol['date'] = data['date'] evol['num_people_1'] = data['people'] evol['num_people_1_5'] = completePeriodIds(self.db.GetPeopleIntake(1,5),period, startdate, enddate)['people'] evol['num_people_5_10'] = completePeriodIds(self.db.GetPeopleIntake(5,10), period, startdate, enddate)['people'] result_dict['people_intake_ts'] = evol createJSON(evol, destdir+"/scm-people-intake-evolutionary.json") return result_dict