def topData(period, startdate, enddate, identities_db, destdir, bots, npeople): top_authors_data = {} top_authors_data['authors.'] = SCM.top_people(0, startdate, enddate, "author" , "" , npeople) top_authors_data['authors.last year']= SCM.top_people(365, startdate, enddate, "author", "", npeople) top_authors_data['authors.last month']= SCM.top_people(31, startdate, enddate, "author", "", npeople) createJSON (top_authors_data, destdir+"/scm-top.json") # Top files top_files_modified_data = SCM.top_files_modified() return top_authors_data
def companies_countriesData(period, startdate, enddate, identities_db, destdir): companies = SCM.companies_name(startdate, enddate) companies = companies['name'] for company in companies: company_name = "'"+company+ "'" countries = SCM.scm_countries_names(identities_db,startdate, enddate) countries = countries['name'] for country in countries : print (country, "=>", company) data = SCM.scm_companies_countries_evol(identities_db, company, country, nperiod, startdate, enddate) data = completePeriodIds(data) createJSON (data, destdir + "/"+company+"_"+country+"-scm-evolutionary.json", False)
def countriesData(period, startdate, enddate, identities_db, destdir): countries = SCM.scm_countries_names(identities_db,startdate, enddate) countries = countries['name'] createJSON(countries, destdir+"/scm-countries.json") for country in countries: print (country) country_name = "'"+country+"'" evol_data = SCM.GetSCMEvolutionaryData(period, startdate, enddate, identities_db, ["country", country_name]) evol_data = completePeriodIds(evol_data) createJSON (evol_data, destdir+"/"+country+"-scm-cou-evolutionary.json") agg = SCM.GetSCMStaticData(period, startdate, enddate, identities_db, ["country", country_name]) createJSON (agg, destdir+"/"+country+"-scm-cou-static.json")
def importToBazaar(self, foreign_tree, bazaar_branch): """Actually import `foreign_tree` into `bazaar_branch`. :param foreign_tree: A `CVSWorkingTree`. :param bazaar_tree: A `bzrlib.branch.Branch`, which must have a colocated working tree. """ foreign_directory = foreign_tree.local_path bzr_directory = str(bazaar_branch.bzrdir.open_workingtree().basedir) scm_branch = SCM.branch(bzr_directory) last_commit = cscvs.findLastCscvsCommit(scm_branch) # If branch in `bazaar_tree` doesn't have any identifiable CSCVS # revisions, CSCVS "initializes" the branch. if last_commit is None: self._runToBaz(foreign_directory, "-SI", "MAIN.1", bzr_directory) # Now we synchronise the branch, that is, import all new revisions # from the foreign branch into the Bazaar branch. If we've just # initialized the Bazaar branch, then this means we import *all* # revisions. last_commit = cscvs.findLastCscvsCommit(scm_branch) self._runToBaz(foreign_directory, "-SC", "%s::" % last_commit, bzr_directory)
def importToBazaar(self, foreign_tree, bazaar_branch): """Actually import `foreign_tree` into `bazaar_branch`. :param foreign_tree: A `SubversionWorkingTree` or a `CVSWorkingTree`. :param bazaar_tree: A `bzrlib.branch.Branch`, which must have a colocated working tree. """ foreign_directory = foreign_tree.local_path bzr_directory = str(bazaar_branch.bzrdir.open_workingtree().basedir) scm_branch = SCM.branch(bzr_directory) last_commit = cscvs.findLastCscvsCommit(scm_branch) # If branch in `bazaar_tree` doesn't have any identifiable CSCVS # revisions, CSCVS "initializes" the branch. if last_commit is None: self._runToBaz( foreign_directory, "-SI", "MAIN.1", bzr_directory) # Now we synchronise the branch, that is, import all new revisions # from the foreign branch into the Bazaar branch. If we've just # initialized the Bazaar branch, then this means we import *all* # revisions. last_commit = cscvs.findLastCscvsCommit(scm_branch) self._runToBaz( foreign_directory, "-SC", "%s::" % last_commit, bzr_directory)
def peopleData(period, startdate, enddate, identities_db, destdir, top_authors_data): top = top_authors_data['authors.']["id"] top += top_authors_data['authors.last year']["id"] top += top_authors_data['authors.last month']["id"] # remove duplicates people = list(set(top)) # the order is not the same than in R json createJSON(people, destdir+"/scm-people.json", False) for upeople_id in people : evol_data = SCM.GetEvolPeopleSCM(upeople_id, period, startdate, enddate) evol_data = completePeriodIds(evol_data) createJSON (evol_data, destdir+"/people-"+str(upeople_id)+"-scm-evolutionary.json") agg = SCM.GetStaticPeopleSCM(upeople_id, startdate, enddate) createJSON (agg, destdir+"/people-"+str(upeople_id)+"-scm-static.json") pass
def tsData(period, startdate, enddate, identities_db, destdir, granularity, conf): # data = vizr.GetSCMEvolutionaryData(period, startdate, enddate, identities_db) # evol_data = completePeriodIds(dataFrame2Dict(data)) data = SCM.GetSCMEvolutionaryData(period, startdate, enddate, identities_db, None) evol_data = completePeriodIds(data) if ('companies' in reports) : data = SCM.EvolCompanies(period, startdate, enddate) evol_data = dict(evol_data.items() + completePeriodIds(data).items()) if ('countries' in reports) : data = SCM.EvolCountries(period, startdate, enddate) evol_data = dict(evol_data.items() + completePeriodIds(data).items()) if ('domains' in reports) : data = SCM.EvolDomains(period, startdate, enddate) evol_data = dict(evol_data.items() + completePeriodIds(data).items()) createJSON (evol_data, destdir+"/scm-evolutionary.json")
def domainsData(period, startdate, enddate, identities_db, destdir): domains = SCM.scm_domains_names(identities_db,startdate, enddate) domains = domains['name'] createJSON(domains, destdir+"/scm-domains.json") # Some R ts are wrong bad_R_json_domains = ['gerrit','gmx','emsenhuber','bitergia'] for domain in domains : domain_name = "'"+domain+"'" print (domain_name) evol_data = SCM.GetSCMEvolutionaryData(period, startdate, enddate, identities_db, ["domain", domain_name]) evol_data = completePeriodIds(evol_data) if domain in bad_R_json_domains: createJSON(evol_data, destdir+"/"+domain+"-scm-dom-evolutionary.json", False) else: createJSON(evol_data, destdir+"/"+domain+"-scm-dom-evolutionary.json") agg = SCM.GetSCMStaticData(period, startdate, enddate, identities_db, ["domain", domain_name]) createJSON(agg, destdir+ "/"+domain+"-scm-dom-static.json")
def reposData(period, startdate, enddate, identities_db, destdir, conf): # repos = dataFrame2Dict(vizr.repos_name(startdate, enddate)) repos = SCM.repos_name(startdate, enddate) repos = repos['name'] if not isinstance(repos, (list)): repos = [repos] createJSON(repos, destdir+"/scm-repos.json", False) else: createJSON(repos, destdir+"/scm-repos.json") for repo in repos : repo_name = "'"+ repo+ "'" print (repo_name) evol_data = SCM.GetSCMEvolutionaryData(period, startdate, enddate, identities_db, ["repository", repo_name]) evol_data = completePeriodIds(evol_data) createJSON(evol_data, destdir+"/"+repo+"-scm-rep-evolutionary.json") agg = SCM.GetSCMStaticData(period, startdate, enddate, identities_db, ["repository", repo_name]) createJSON(agg, destdir+"/"+repo+"-scm-rep-static.json")
def companiesData(period, startdate, enddate, identities_db, destdir, bots, npeople): companies = SCM.companies_name_wo_affs(bots, startdate, enddate) companies = companies['name'] createJSON(companies, destdir+"/scm-companies.json") for company in companies: company_name = "'"+ company+ "'" print (company_name) evol_data = SCM.GetSCMEvolutionaryData(period, startdate, enddate, identities_db, ["company", company_name]) evol_data = completePeriodIds(evol_data) createJSON(evol_data, destdir+"/"+company+"-scm-com-evolutionary.json") agg = SCM.GetSCMStaticData(period, startdate, enddate, identities_db, ["company", company_name]) createJSON(agg, destdir+"/"+company+"-scm-com-static.json") top_authors = SCM.company_top_authors(company_name, startdate, enddate, npeople) createJSON(top_authors, destdir+"/"+company+"-scm-com-top-authors.json", False) for i in [2006,2009,2012]: data = SCM.company_top_authors_year(company_name, i, npeople) createJSON(data, destdir+"/"+company+"-scm-top-authors_"+str(i)+".json", False) commits = SCM.GetCommitsSummaryCompanies(period, startdate, enddate, opts.identities_db, 10) createJSON (commits, destdir+"/scm-companies-commits-summary.json")
def _runToBaz(self, source_dir, flags, revisions, bazpath): """Actually run the CSCVS utility that imports revisions. :param source_dir: The directory containing the foreign working tree that we are importing from. :param flags: Flags to pass to `totla.totla`. :param revisions: The revisions to import. :param bazpath: The directory containing the Bazaar working tree that we are importing into. """ # XXX: JonathanLange 2008-02-08: We need better documentation for # `flags` and `revisions`. config = CVS.Config(source_dir) config.args = ["--strict", "-b", bazpath, flags, revisions, bazpath] totla.totla(config, self._logger, config.args, SCM.tree(source_dir))
def aggData(period, startdate, enddate, identities_db, destdir): # data = dataFrame2Dict(vizr.GetSCMStaticData(period, startdate, enddate, identities_db)) data = SCM.GetSCMStaticData(period, startdate, enddate, identities_db, None) agg = data static_url = SCM.StaticURL() agg = dict(agg.items() + static_url.items()) if ('companies' in reports): data = SCM.evol_info_data_companies (startdate, enddate) agg = dict(agg.items() + data.items()) if ('countries' in reports): data = SCM.evol_info_data_countries (startdate, enddate) agg = dict(agg.items() + data.items()) if ('domains' in reports): data = SCM.evol_info_data_domains (startdate, enddate) agg = dict(agg.items() + data.items()) data = SCM.GetCodeCommunityStructure(period, startdate, enddate, identities_db) agg = dict(agg.items() + data.items()) # TODO: repeated data # data = dataFrame2Dict(vizr.GetDiffCommitsDays(period, enddate, 365)) # agg = dict(agg.items() + data.items()) # Tendencies for i in [7,30,365]: data = SCM.GetDiffCommitsDays(period, enddate, identities_db, i) agg = dict(agg.items() + data.items()) data = SCM.GetDiffAuthorsDays(period, enddate, identities_db, i) agg = dict(agg.items() + data.items()) data = SCM.GetDiffFilesDays(period, enddate, identities_db, i) agg = dict(agg.items() + data.items()) data = SCM.GetDiffLinesDays(period, enddate, identities_db, i) agg = dict(agg.items() + data.items()) # Last Activity: to be removed for i in [7,14,30,60,90,180,365,730]: data = SCM.last_activity(i) agg = dict(agg.items() + data.items()) # Fields with wrong data in R skip_fields = ['percentage_removed_lines_30','percentage_added_lines_30','diff_netadded_lines_30','diff_netremoved_lines_30'] createJSON (agg, destdir+"/scm-static.json", True, skip_fields)
for i in range(len(lis)): for k in range(len(lis[i])): lis[i][k]=dic[lis[i][k]] return lis if __name__ == "__main__": data=np.array([[1772.14,568.25,298.66,352.20,307.21,490.83,364.28,202.50], # 辽宁 # 原始样本矩阵 [2752.25,569.95,662.31,541.06,623.05,917.23,599.98,354.39], # 浙江 [1386.76,460.99,312.97,280.78,246.24,407.26,547.19,188.52], # 河南 [1552.77,517.16,402.03,272.44,265.29,563.10,302.27,251.41], # 甘肃 [1711.03,458.57,334.91,307.24,297.72,495.34,274.48,306.45]]) # 青海 # 总计5个样本 dic={0:'辽宁',1:'浙江',2:'河南',3:'甘肃',4:'青海'} scm=SCM.SCM(classdist='nearest', sampledist='euc') res=scm.fit(data,kind=5) # 获得分类后的样本下标 res=replace(res,dic) # 替换样本下标为样本标签名 print("分为5类结果 res = :",res) res=scm.fit(data,kind=4) # 获得分类后的样本下标 res=replace(res,dic) # 替换样本下标为样本标签名 print("分为4类结果 res = :",res) res=scm.fit(data,kind=3) # 获得分类后的样本下标 res=replace(res,dic) # 替换样本下标为样本标签名 print("分为3类结果 res = :",res) res=scm.fit(data,kind=2) # 获得分类后的样本下标 res=replace(res,dic) # 替换样本下标为样本标签名
if __name__ == "__main__": data = np.array([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10], [1, 4, 5, 8, 9], [1100, 1300, 1500, 1700, 1900], [1200, 1400, 1600, 1800, 2000], [1100, 1400, 1500, 1800, 1900], [11000, 13000, 15000, 17000, 19000], [12000, 14000, 16000, 18000, 20000], [11000, 14000, 15000, 18000, 19000]]) #12个样本,明显属于3类 # 1. 欧氏距离 print("欧氏距离测试: ") scm = SCM.SCM(classdist='nearest', sampledist='euc') res = scm.fit(data, kind=3) print("样本间欧氏距离+类间最近距离: res = ", res) scm = SCM.SCM(classdist='farthest', sampledist='euc') res = scm.fit(data, kind=3) print("样本间欧氏距离+类间最远距离: res = ", res) scm = SCM.SCM(classdist='average', sampledist='euc') res = scm.fit(data, kind=3) print("样本间欧氏距离+类间平均距离: res = ", res) scm = SCM.SCM(classdist='centroid', sampledist='euc') res = scm.fit(data, kind=3) print("样本间欧氏距离+类间重心距离: res = ", res)