period = ITSPeriodCondition(start=datetime(2014, 1, 1), end=None) data = ITSActivityPersons(datasource=database, name="list_changers", conditions=(period, )) activity = data.activity() print activity #--------------------------------- print_banner("List of activity for each changer (unique ids)") data = ITSActivityPersons(datasource=database, name="list_uchangers") activity = data.activity() print activity # MLS database database = MLSDatabase(url="mysql://*****:*****@localhost/", schema="oscon_openstack_mls", schema_id="oscon_openstack_scm") #--------------------------------- print_banner("List of activity for each sender") data = MLSActivityPersons(datasource=database, name="list_senders") activity = data.activity() print activity #--------------------------------- print_banner("List of activity for each sender (unique ids)") data = MLSActivityPersons(datasource=database, name="list_usenders") activity = data.activity() print activity #---------------------------------
def result(self, data_source = None): """Produce result data for the analysis Parameters ---------- data_source: SCM.SCM, MLS.MLS Returns ------- dictionary: timezone data. It includes three generic components, with following keys: "tz": list of timezones from -12 to 11, "authors": list of authors for each timezone "authors365": list of authors for each timezone on the last 365 days Depending on the type of data source, it will contain two more keys. On SCM data sources: "commits": list of commits for each timezone "commits365": list of commits for each timezone on the last 365 days On MLS data sources: "messages": list of messages for each timezone "messages365": list of messages for each timezone on the last 365 days """ logging.info("Producing data for study: Timezone") if data_source is None: logging.info("Error: no data source for study!") return if data_source not in (SCM, MLS): logging.info("Error: data_source not supported!") return # Prepare the SQLAlchemy database url url = 'mysql://' + self.db.user + ':' + \ self.db.password + '@' + self.db.host + '/' schema = self.db.database schema_id = self.db.identities_db # Get startdate, endate as datetime objects startdate = datetime.strptime(self.filters.startdate, "'%Y-%m-%d'") enddate = datetime.strptime(self.filters.enddate, "'%Y-%m-%d'") # Get last 365 days period start365 = enddate - timedelta(days=365) end365 = enddate if data_source == SCM: logging.info("Analyzing timezone for SCM") nomerges = SCMNomergesCondition() database = SCMDatabase(url=url, schema=schema, schema_id=schema_id) period = SCMPeriodCondition(start=startdate, end=enddate) data = SCMActivityTZ(datasource=database, name="authors", conditions=(period, nomerges)) period365 = SCMPeriodCondition(start=start365, end=end365) data365 = SCMActivityTZ(datasource=database, name="authors", conditions=(period365, nomerges)) elif data_source == MLS: logging.info("Analyzing timezone for MLS") database = MLSDatabase(url=url, schema=schema, schema_id=schema_id) period = MLSPeriodCondition(start=startdate, end=enddate, date="first") data = MLSActivityTZ(datasource=database, name="senders", conditions=(period,)) period365 = MLSPeriodCondition(start=start365, end=end365, date="first") data365 = MLSActivityTZ(datasource=database, name="senders", conditions=(period365,)) timezones = data.timezones() timezones365 = data365.timezones() if data_source == SCM: timezones['commits365'] = timezones365['commits'] elif data_source == MLS: timezones['messages365'] = timezones365['messages'] timezones['authors365'] = timezones365['authors'] return timezones
def result(self, data_source = None): """Produce result data for the analysis Parameters ---------- data_source: { SCM.SCM | ITS.ITS | MLS.MLS } Returns ------- dictionary: birth and aging data. The dictionary has two entries, keyed "birth" and "data". For each of them, information about the duration of all actors in the project is included. """ logging.info("Producing data for study: Aging") if data_source is None: logging.info("Error: no data source for study!") return # Prepare the SQLAlchemy database url url = 'mysql://' + self.db.user + ':' + \ self.db.password + '@' + self.db.host + '/' schema = self.db.database schema_id = self.db.identities_db # Get startdate, endate as datetime objects startdate = datetime.strptime(self.filters.startdate, "'%Y-%m-%d'") enddate = datetime.strptime(self.filters.enddate, "'%Y-%m-%d'") # Get dictionary with analysis, if any self.analysis_dict = parse_analysis (self.filters.type_analysis) if data_source == SCM: logging.info("Analyzing aging for SCM") # Activity data (start time, end time for contributions) for # all the actors, considering only activty during # the startdate..enddate period (merges are not considered # as activity) period = SCMPeriodCondition (start = startdate, end = enddate) nomerges = SCMNomergesCondition() conditions = [period, nomerges] if self.filters.COMPANY in self.analysis_dict: orgs = SCMOrgsCondition ( orgs = (self.analysis_dict[self.filters.COMPANY],), actors = "authors") conditions.append(orgs) database = SCMDatabase (url = url, schema = schema, schema_id = schema_id) data = SCMActivityPersons ( datasource = database, name = "list_uauthors", conditions = conditions) elif data_source == ITS: logging.info("Analyzing aging for ITS") # Activity data (start time, end time for contributions) for # all the actors, considering only activty during # the startdate..enddate period period = ITSPeriodCondition (start = startdate, end = enddate) conditions = [period,] if self.filters.COMPANY in self.analysis_dict: orgs = ITSOrgsCondition ( orgs = (self.analysis_dict[self.filters.COMPANY],) ) conditions.append(orgs) database = ITSDatabase (url = url, schema = schema, schema_id = schema_id) data = ITSActivityPersons ( datasource = database, name = "list_uchangers", conditions = conditions) elif data_source == MLS: logging.info("Analyzing aging for MLS") # Activity data (start time, end time for contributions) for # all the actors, considering only activty during # the startdate..enddate period period = MLSPeriodCondition (start = startdate, end = enddate, date = "check") conditions = [period,] if self.filters.COMPANY in self.analysis_dict: orgs = MLSOrgsCondition ( orgs = (self.analysis_dict[self.filters.COMPANY],), actors = "senders", date = "check" ) conditions.append(orgs) database = MLSDatabase (url = url, schema = schema, schema_id = schema_id) data = MLSActivityPersons ( datasource = database, name = "list_usenders", date_kind = "check", conditions = conditions) else: logging.info("Error: No aging analysis for this data source!") if data_source in (SCM, ITS, MLS): # Birth has the ages of all actors, consiering enddate as # current (snapshot) time snapshot = SnapshotCondition (date = enddate) birth = DurationPersons (datasource = data, name = "age", conditions = (snapshot,), ) # "Aging" has the ages of those actors active during the # last half year (that is, the period from enddate - half year # to enddate) active_period = ActiveCondition (after = enddate - \ timedelta(days=182), before = enddate) aging = DurationPersons (datasource = data, name = "age", conditions = (snapshot, active_period), ) demos = {"birth": birth.durations(), "aging": aging.durations()} return demos else: return {"birth": {}, "aging": {}}
from grimoirelib_alch.family.activity_persons import ( SCMActivityPersons, ITSActivityPersons, MLSActivityPersons ) from grimoirelib_alch.aux.reports import create_report, add_report stdout_utf8() snapshot_date = datetime(2014,7,1) activity_period = timedelta(days=182) #--------------------------------- print_banner("Demographics with MLS database, MediaWiki") database = MLSDatabase (url = "mysql://*****:*****@localhost/", schema = "mls_wikimedia", schema_id = "scm_wikimedia") # Wikimedia mailing lists don't always keep "arrival_date", therefore # we have to use "fisrt_date". activity = MLSActivityPersons ( datasource = database, name = "list_usenders", date_kind = "check") report = report_demographics (activity_persons = activity, snapshot_date = snapshot_date, activity_period = activity_period, prefix = 'mls-' ) create_report (report_files = report, destdir = '/tmp/')
print_banner("Author activity per time zone (now using session)") session = database.build_session(query_cls = SCMTZQuery) data = SCMActivityTZ ( datasource = session, name = "authors") tz = data.timezones() print tz #--------------------------------- print_banner("Author activity per time zone (using session, conditions)") period = SCMPeriodCondition (start = datetime(2014,1,1), end = None) nomerges = SCMNomergesCondition() data = SCMActivityTZ ( datasource = session, name = "authors", conditions = (period,nomerges)) tz = data.timezones() print tz database = MLSDatabase (url = 'mysql://*****:*****@localhost/', schema = 'oscon_openstack_mls', schema_id = 'oscon_openstack_scm') #--------------------------------- print_banner("Sender activity per time zone") data = MLSActivityTZ ( datasource = database, name = "senders") tz = data.timezones() print tz
) \ .join (SCMDatabase.Actions) \ .filter(SCMDatabase.Actions.branch_id.in_ (scm_branches), SCMDatabase.SCMLog.repository_id.in_ (scm_repos), SCMDatabase.SCMLog.author_date > month_start, SCMDatabase.SCMLog.author_date <= month_end ) for row in query.all(): print row # # MLS # database = MLSDatabase(url=mls_database["url"], schema=mls_database["schema"], schema_id=mls_database["schema_id"]) session = database.build_session() # Get MLS repository ids (urls) query = session.query( label("id", MLSDatabase.MailingLists.mailing_list_url) ) \ .filter (MLSDatabase.MailingLists.mailing_list_name.in_ ( mls_devels_name) ) mls_devels = [row.id for row in query.all()] query = session.query( label("id", MLSDatabase.MailingLists.mailing_list_url) ) \ .filter (MLSDatabase.MailingLists.mailing_list_name.in_ (