def _printToLog(self, source, revision_number, log): if len(log) > 0: revCurr = self.project_repo.revCurr _make_dir('/tmp/ohm/') with open('/tmp/ohm/' + self.project_repo.project.name + '-errors.log', 'a') as f: f.write("\n\n***********************************\n\n") for each in log: output = str(datetime.now()) output += ' ' + str(revCurr) output += ' ' + source output += ' ' + str(revision_number) output += '\n\t' + each[0] output += ' ' + each[1] output += '\n\t' + str(each[2]) output += '\n' f.write(output)
def get_file(self, file_name, revision_number, tries=5): if revision_number is None: revision_number = 0 rev = pysvn.Revision(pysvn.opt_revision_kind.number, revision_number) # ensure the URL does not have an ending slash url = self.project.url if url.endswith('/'): url = url[:-1] # ensure the file_name does have a beginning slash if not file_name.startswith('/'): file_name = '/' + file_name # now url + file_name is valid # TODO: handle spaces? Used to work, but does not seem to be now # create output directory for checking out files output = '/tmp/ohm/'+ self.project.name + '-svn' + file_name _make_dir(output[:output.rfind('/')]) while tries > 0: try: self.client.export(url + file_name, output, revision=rev, recurse=False) # success! break except pysvn.ClientError as e: message, code = e.args[1][0] if code == 175002: # Retry to check out file print('Code:', code, 'Message:', message, '\n', file_name, revision_number) tries -= 1 continue else: # Some other error, just quit trying print('Code:', code, 'Message:', message, '\n', file_name, revision_number) break with open(output, 'r') as f: file_contents = ''.join(f.readlines()) return file_contents
def main(argv): # Configure option parser optparser = OptionParser(usage='%prog [options]', version='0.1') optparser.set_defaults(force_drop=False) optparser.set_defaults(verbose=False) optparser.set_defaults(generate=False) optparser.set_defaults(build_db=False) optparser.set_defaults(tester=False) optparser.set_defaults(speed_run=False) optparser.set_defaults(output_dir='/tmp/ohm') optparser.set_defaults(project_revision='-1') optparser.set_defaults(project_revision_end='-1') optparser.set_defaults(database_host='localhost') optparser.set_defaults(database_port='5432') optparser.set_defaults(database_user='******') optparser.set_defaults(database_password='******') optparser.set_defaults(database_db='ohmdb') optparser.add_option('-o', '--output-dir', dest='output_dir', help='Output directory') optparser.add_option('-n', '--project_name', dest='project_name', help='Project name') optparser.add_option('-m', '--project_name2', dest='project_name2', help='Project name') optparser.add_option('-r', '--revision', dest='project_revision', help='Project revision to begin upon') optparser.add_option('-e', '--revision_end', dest='project_revision_end', help='Project revision to stop after') optparser.add_option('-f', '--force_drop', dest='force_drop', help='Drop all tables before beginning', action='store_true') optparser.add_option('-v', '--verbose', dest='verbose', help='Be verbose in output', action='store_true') optparser.add_option('-g', '--generate', dest='generate', help='Generate vectors', action='store_true') optparser.add_option('-t', '--tester', dest='tester', help='Run tester function', action='store_true') optparser.add_option('-s', '--speed_run', dest='speed_run', help='Run without database interactions', action='store_true') optparser.add_option('-b', '--build', dest='build_db', help='Run analysis and build database', action='store_true') optparser.add_option('-a', '--host', dest='database_host', help='Use a custom database host address') optparser.add_option('-p', '--port', dest='database_port', help='Use a custom database host port') optparser.add_option('-u', '--username', dest='database_user', help='Use a custom database username') optparser.add_option('-P', '--password', dest='database_password', help='Use a custom database host port') optparser.add_option('-d', '--database', dest='database_db', help='Use a custom database') # Invoke option parser (options, args) = optparser.parse_args(argv) starting_revision = options.project_revision ending_revision = options.project_revision_end if options.project_name is None: optparser.error('You must supply a project name!') else: project_name = options.project_name #if project_name.lower() in config.projects: # project_url = base_svn + config.projects[project_name.lower()][0] if project_name not in config.projects: print('Project information not in config.py') sys.exit() project = config.projects[project_name] if options.project_name2 is not None: project_name = options.project_name2 #if project_name.lower() in config.projects: # project_url = base_svn + config.projects[project_name.lower()][0] if project_name not in config.projects: print('Project information not in config.py') sys.exit() project2 = config.projects[project_name] dual_speed_run(project, project2) sys.exit(0) # create output directory tmp_dir = '/'.join([options.output_dir.rstrip('/')]) if False == os.path.exists(tmp_dir): _make_dir(tmp_dir) # open database connection db = Database( host=options.database_host, port=options.database_port, user=options.database_user, password=options.database_password, database=options.database_db, verbose=options.verbose ) if options.tester: # tester(db, project, starting_revision, ending_revision) compare_git_svn(db, config.projects["jhotdraw-git"], config.projects["jhotdraw"]) sys.exit(0) if options.force_drop: db.force_drop() if options.speed_run: speed_run(project, starting_revision, ending_revision) if options.build_db: build_db(db, project, starting_revision, ending_revision) if options.generate: generate(db, project, starting_revision, ending_revision, False, ('class', 'enum', 'interface', '@interface'), profile_name='class_') # just leave name as 'profiles.txt' # generate the methods generate(db, project, starting_revision, ending_revision, False, ('method', ), profile_name='method_') # the following will seem weird, but in the database the full_name() # function will ignore file types when building the block's full name so # if we use it on the file type, we just get the package name. generate # will merge all the duplicate information into one package for us # afterward. # warning: if a file did not have an associated package, it will show up # in this list rather than the package name. this is a nifty workaround # to tracking package changes via the file changes. generate(db, project, starting_revision, ending_revision, False, ('file', ), profile_name='package_') # here, we will disable generates use of the full_name in its queries, # giving us only the file name (and more importantly, excluding the # package) generate(db, project, starting_revision, ending_revision, False, ('file', ), profile_name='file_', no_full_name_func=True) if not (options.force_drop or options.build_db or options.generate or options.speed_run): optparser.error('Did not have any action to perform. Must either drop\ tables (-f), build tables (-b), or generate vectors from tables\ (-g)') sys.exit(0)
def generate(db, project, starting_revision, ending_revision, use_sums, type_list, profile_name = '', no_full_name_func=False): # from type list, build query info typestr = 'block.type=%s or ' * len(type_list) typestr = typestr.rstrip(' or ') # set the name string used in the queries. if no_full_name_func: # just use the block's saved full_name as-is namestr = 'block.full_name' else: # use the sql function instead to build the full_name namestr = 'full_name(block.id)' # this dictionary is used throughout as a unique properties dictionary # used to get the UID of the entries in the table its used for. It should # always be reassigned when used. propDict = { 'name': project.name, 'url': project.url } # get the project uid pid = getUID(db, 'project', ('url',), propDict) revisions = db.execute('SELECT number from revision where project=%s \ order by id desc', (pid, )) if revisions is None or len(revisions) == 0: print('Error: project has not been built yet, use -b') return output_dir = '/tmp/ohm/{name}-r{revision}/'.format(name=project.name, revision=revisions[0][0]) if False == os.path.exists(output_dir): _make_dir(output_dir) owner_remap(db, project.name, pid) owners = db.execute('SELECT * from owner where project=%s', (pid, )) with open(output_dir + 'key.txt', 'w') as f: for each in owners: f.write('%s\n' % each[1]) # before we start generating class vectors, lets build a list of duplicates # to save off for merging later dup_results = db.execute('select {name} from block where \ project=%s and ({types}) group by {name} \ having (count({name}) > 1)'.format(name=namestr, types=typestr), (pid, ) + tuple(type_list)) duplicated = [] # copy just the strings for d in dup_results: duplicated.append(d[0]) data_table = 'change_data' if use_sums: data_table = data_table + '_sums' else: data_table = data_table + '_count' c = db.cursor c.execute('SELECT block.id, {name}, {table}.sum, owner_id \ from {table} join block on {table}.block_id = block.id \ where block.project=%s and \ ({types})'.format(table=data_table,name=namestr,types=typestr), (pid, ) + tuple(type_list)) curr_id = -1 curr_full_name = '' ownership_profile = {} for o in owners: ownership_profile[o[0]] = 0 duplicated_profiles = {} for d in duplicated: duplicated_profiles[d] = [] with open(output_dir + profile_name + 'profiles.txt', 'w') as f: for each in c: if curr_id != each[0]: if curr_id != -1: if curr_full_name in duplicated_profiles: # using a dict of strings to hold lists of dicts duplicated_profiles[curr_full_name].append(ownership_profile) else: valstr = '%s,' * len(ownership_profile) valstr = valstr.rstrip(',') + '\n' o_tuple = tuple(ownership_profile.values()) f.write(curr_full_name + ' ') f.write(valstr % o_tuple) curr_id = each[0] curr_full_name = each[1] ownership_profile = {} for o in owners: ownership_profile[o[0]] = 0 ownership_profile[each[3]] = each[2] if c.rownumber == c.rowcount: if curr_full_name in duplicated_profiles: # using a dict of strings to hold lists of dicts duplicated_profiles[curr_full_name].append(ownership_profile) else: valstr = '%s,' * len(ownership_profile) valstr = valstr.rstrip(',') + '\n' o_tuple = tuple(ownership_profile.values()) f.write(curr_full_name + ' ') f.write(valstr % o_tuple) for d in duplicated_profiles: tmp_p = None for p in duplicated_profiles[d]: if tmp_p is None: tmp_p = dict(p) else: for elem in p: tmp_p[elem] = tmp_p.get(elem, 0) + p[elem] if tmp_p is not None: valstr = '%s,' * len(tmp_p) valstr = valstr.rstrip(',') + '\n' o_tuple = tuple(tmp_p.values()) f.write(d + ' ') f.write(valstr % o_tuple)