def stackoverflow(framework, projects):
    global api
    api = StackAPI("stackoverflow")
    samples = get_samples(projects)
    output_write(framework, directory, "questions_and_answers", get_header(), True)
    for index, sample in enumerate(samples):
        print_status_samples(index+1, len(samples))
        questions = get_questions_when_body_has(sample)
        for indx, question in enumerate(questions["items"]):
            print("{0}% questions analysed of {1}".format( (indx+1)/len(questions)*100, sample))
            try:
                answer = api.fetch("answers/{ids}", ids=[question["accepted_answer_id"]])["items"][0]
                answer_owner = get_owner_by_user_id(api, answer["owner"]["user_id"])
            except KeyError:
                answer = {
                    "answer_id": "",
                    "score": "",
                    "creation_date": ""
                }
                answer_owner = {
                    "user_id": "",
                    "reputation": "",
                    "creation_date": "",
                    "tags": []
                }
            question_owner = get_owner_by_user_id(api, question["owner"]["user_id"])
            output = create_output(framework, sample, question, answer, question_owner, answer_owner)
            output_write(framework, directory, "questions_and_answers", output, False)
示例#2
0
def delay(framework, projects, githubtoken):
    print("Computing delay to update")
    path_dos_repositorios = 'repositories'
    measure = "delay"
    output_write(framework, measure, measure, "framework,path,current_version,next_version,framework_release_date (YYYY-DD-MM),sample_update_date (YYYY-DD-MM) ,delay_in_days", True)
    framework_release_data = buscar_dados_de_lancamento_de_versoes(framework, githubtoken)
    configuration_file = define_arquivo_de_configuracao(framework)
    samples = get_samples(projects)
    for index, sample in enumerate(samples):
        print_status_samples(index+1, len(samples))
        sample_path = path_dos_repositorios + "/" + sample
        paths_configuration_file = find_paths(configuration_file, sample_path)
        repository = Repo(sample_path)
        reversed_commits = get_commits(repository)
        for path in paths_configuration_file:
            current_version, reversed_commits = get_first_version(framework, path, repository, reversed_commits)
            if current_version == {}:
                continue
            for commit in reversed_commits:
                repository.git.checkout(commit, '-f')
                next_version = buscar_versao_do_framework(framework, path)
                if current_version != next_version and next_version != '' and current_version != '' and current_version != None and next_version != None:
                    sample_update_date = get_commit_date(commit)
                    framework_release_date = framework_release_data[next_version]
                    delay_in_days = calculate_delay(framework_release_date, sample_update_date)
                    output_write(framework, measure, measure, create_output(current_version, delay_in_days, framework, framework_release_date, next_version, path, sample_update_date), False)
                    current_version = next_version
        repository.git.checkout('master', '-f')
示例#3
0
def allanswers(framework, projects):
    global api
    api = StackAPI("stackoverflow")
    samples = get_samples(projects)
    output_write(framework, directory, "all_answers", get_header(), True)
    with open("stackoverflow/" + framework +
              "_questions_and_answers_output.csv") as questions:
        for index, question in enumerate(questions):
            if index == 0: continue
            print("Questions from sample " + question.split(",")[1])
            question = question.replace("\n", "")
            question_id = question.split(",")[2]
            answers = api.fetch("questions/" + question_id +
                                "/answers")["items"]
            print(len(answers))
            for indx, answer in enumerate(answers):
                print("{0}% answers analysed of question {1}".format(
                    (indx + 1) / len(answers) * 100, question_id))
                try:
                    answer_owner = get_owner_by_user_id(
                        api, answer["owner"]["user_id"])
                except KeyError:
                    answer_owner = {
                        "user_id": "",
                        "reputation": "",
                        "creation_date": "",
                        "tags": []
                    }

                output = create_output(framework,
                                       question.split(",")[1], question_id,
                                       answer, answer_owner)
                output_write(framework, directory, "all_answers", output,
                             False)
 def output_callback(path, filter_result):
     path = path.strip()
     path = path[brick_path_len + 1:]
     output_write(fout,
                  path,
                  args.output_prefix,
                  encode=(not args.no_encode),
                  tag=args.tag,
                  field_separator=args.field_separator)
def githubmetadata(framework, projects, githubtoken):
    print("Computing github metadata")
    measure = "githubmetadata"
    output_write(framework, measure, measure, "framework,repository,forks,stargazers,watchers,openedIssues,closedIssues,commits,openedPullRequests,closedPullRequests,updatedAt,projects,lifetime,lifetime per commit", True)
    g = Github(githubtoken)
    samples = get_samples(projects)
    for index, sample in enumerate(samples):
        print_status_samples(index+1, len(samples))
        repo = g.get_repo(sample)
        output = create_output(framework, repo, sample)
        output_write(framework, measure, measure, output, False)
def write_header(action_in_files, configuration_files, extension_files,
                 framework, measure):
    output = "framework,path"
    for action in action_in_files:
        output += "," + action
    output += ",total_actions"
    for file in extension_files:
        output += "," + file
    for file in configuration_files:
        output += "," + file
    output_write(framework, "file_extension_changes", measure, output, True)
示例#7
0
def count_forks_ahead(framework, forks, repository):
    forks_ahead = 0
    for fork in forks:
        manage_limit_rate(forks.totalCount)
        try:
            comparation = repository.compare(repository.default_branch, fork.owner.login + ":" + fork.default_branch)
            if comparation.ahead_by > 0:
                output_write(framework, "forksahead", "forks_ahead", framework+","+fork.full_name+","+str(comparation.ahead_by), False)
                forks_ahead = forks_ahead + 1
        except:
            continue
    return forks_ahead
def write_content(action_in_files, configuration_files, extension_files,
                  framework, sample, measure):
    output = framework + "," + sample
    actions_count = 0
    for action in action_in_files:
        output += "," + str(action_in_files[action])
        actions_count += action_in_files[action]
    output += "," + str(actions_count)
    for file in extension_files:
        output += "," + str(extension_files[file])
    for file in configuration_files:
        output += "," + str(configuration_files[file])
    output_write(framework, "file_extension_changes", measure, output, False)
def generalprojects(projects):
    samples = get_samples(projects)
    output_write("", "generalprojects", "projects",
                 "path,stars,language,framework", False)
    for repository in samples:
        clone(repository)
        print("{0} baixado".format(repository))
        framework = get_framework(repository)
        print("{0} classificado como {1}".format(repository, framework))
        shutil.rmtree("generalprojects/repositories/" +
                      repository.split("/")[0])
        print("{0} apagado".format(repository))
        output_write("", "generalprojects", "projects",
                     "{0},{1}".format(repository, framework), False)
示例#10
0
        def output_callback(path, filter_result, is_dir):
            path = path.strip()
            path = path[brick_path_len+1:]

            if args.type == "both":
                output_write(fout, path, args.output_prefix,
                             encode=(not args.no_encode), tag=args.tag,
                             field_separator=args.field_separator)
            else:
                if (is_dir and args.type == "d") or (
                    (not is_dir) and args.type == "f"):
                    output_write(fout, path, args.output_prefix,
                    encode=(not args.no_encode), tag=args.tag,
                    field_separator=args.field_separator)
示例#11
0
def importcount(framework, projects):
    print("Computing imports")
    measure = "importcount"
    output_write(framework, measure, measure,
                 "framework,path,imports,javaFiles,imports/java_files", True)
    samples = get_samples(projects)
    for index, sample in enumerate(samples):
        print_status_samples(index + 1, len(samples))
        deal_with_empty_repo(sample)
        java_files_path = find_paths("*.java", "repositories/" + sample)
        imports = get_imports(framework, java_files_path)
        relative = calculate_relative(imports, java_files_path)
        output_write(
            framework, measure, measure,
            create_output(framework, imports, java_files_path, relative,
                          sample), False)
示例#12
0
def numberofextensionfile(framework, projects):
    print("Computing extension files")
    extensions = create_extension_files()
    measure = "numberofextensionfile"
    output_write(
        framework, measure, measure,
        'framework,project,java,properties,jar,build.gradle,pom.xml,manifest.xml,xml,bat,md,adoc,README,yaml,txt,sh,travis.yml,yml,cmd,kt,json,numberOfFiles,others',
        True)
    samples = get_samples(projects)
    for index, sample in enumerate(samples):
        print_status_samples(index + 1, len(samples))
        deal_with_empty_repo(sample)
        count_extension_files(extensions, sample)
        others = count_others(extensions)
        output = concat_output(extensions) + str(others)
        output_write(framework, measure, measure,
                     framework + "," + sample + "," + output, False)
def currentframeworkversion(framework, projects):
    print("Computing current framework version")
    configuration_file = find_config_file(framework)
    configuration_file_key_words = get_key_words(framework)
    write_output_header(configuration_file_key_words, framework)
    samples = get_samples(projects)
    for index, sample in enumerate(samples):
        print_status_samples(index+1, len(samples))
        checkout_default_branch_repository(sample)
        deal_with_empty_repo(sample)
        configuration_files_paths = find_paths(configuration_file, "repositories/" + sample)
        for path in configuration_files_paths:
            output = framework + "," + path
            for key, value in configuration_file_key_words.items():
                version = get_framework_version(framework, path, key)
                output = output + "," + version
            if ",,," not in output and (framework != "spring" or "RELEASE" in output):
                output_write(framework, "currentframeworkversion", "currentframeworkversion", output, False)
示例#14
0
def understandmetrics(framework, projects):
    samples = get_samples(projects)
    owner = samples[0].split("/")[0]
    create_output_directory("understandmetrics", owner)
    output_write(
        framework, "understandmetrics", "understandmetrics",
        "framework,projeto,AvgCyclomatic,AvgCyclomaticModified,AvgCyclomaticStrict,AvgEssential,AvgLine,AvgLineBlank,AvgLineCode,AvgLineComment,CountClassBase,CountClassCoupled,CountClassCoupledModified,CountClassDerived,CountDeclClass,CountDeclClassMethod,CountDeclClassVariable,CountDeclExecutableUnit,CountDeclFile,CountDeclFunction,CountDeclInstanceMethod,CountDeclInstanceVariable,CountDeclMethod,CountDeclMethodAll,CountDeclMethodDefault,CountDeclMethodPrivate,CountDeclMethodProtected,CountDeclMethodPublic,CountInput,CountLine,CountLineBlank,CountLineCode,CountLineCodeDecl,CountLineCodeExe,CountLineComment,CountOutput,CountPath,CountPathLog,CountSemicolon,CountStmt,CountStmtDecl,CountStmtExe,Cyclomatic,CyclomaticModified,CyclomaticStrict,Essential,Knots,MaxCyclomatic,MaxCyclomaticModified,MaxCyclomaticStrict,MaxEssential,MaxEssentialKnots,MaxInheritanceTree,MaxNesting,MinEssentialKnots,PercentLackOfCohesion,PercentLackOfCohesionModified,RatioCommentToCode,SumCyclomatic,SumCyclomaticModified,SumCyclomaticStrict,SumEssential,?,numberOfJavaFiles",
        True)

    for sample in samples:
        repositories_path = "/home/gabriel/Documentos/gabrielsmenezes/pesquisamestrado/repositories/"
        sample_path = repositories_path + sample
        udb_path = "understandmetrics/" + sample
        deal_with_empty_repo(sample)
        metrics = get_understand_metrics(framework, sample, udb_path,
                                         sample_path)
        output = create_output(metrics)
        output_write(framework, "understandmetrics", "understandmetrics",
                     output, False)
def maintainers(framework, projects, githubtoken):
    print("Computing maintainers data")
    output_write(
        framework, "maintainers", "maintainers",
        "framework,path,framework_contributors,sample_contributors,commom_contributors,commom/framework,commom/sample",
        True)
    framework_repository = get_repository_name(framework)
    framework_contributors = get_contributors(framework_repository,
                                              githubtoken)
    framework_contributors.totalCount
    samples = get_samples(projects)
    for index, sample in enumerate(samples):
        print_status_samples(index + 1, len(samples))
        sample_contributors = get_contributors(sample, githubtoken)
        commmom_contributors = get_commom_contributors(framework_contributors,
                                                       sample_contributors)
        output_write(
            framework, "maintainers", "maintainers",
            create_output(framework, sample, framework_contributors,
                          sample_contributors, commmom_contributors), False)
示例#16
0
def forksahead(framework, projects, githubtoken):
    print("Computing forks ahead data")
    g = Github(githubtoken)
    output_write(framework, "forksahead", "forks_ahead_by_projects", "framework,path,number_of_forks,forks_ahead,ratio", True)
    output_write(framework, "forksahead", "forks_ahead", "framework,path,number_of_forks,forks_ahead,ratio", True)
    samples = get_samples(projects)
    for index, sample in enumerate(samples):
        manage_limit_rate(len(samples))
        print_status_samples(index+1, len(samples))
        repository = g.get_repo(sample)
        forks = repository.get_forks()
        forks_ahead = count_forks_ahead(framework, forks, repository)
        number_of_forks = repository.forks_count
        ratio_forks_ahead = forks_ahead / number_of_forks
        output = create_output(sample, framework, number_of_forks, forks_ahead, ratio_forks_ahead)
        output_write(framework, "forksahead", "forks_ahead_by_projects", output, False)
def write_output_header(configuration_file_key_words, framework):
    header = "framework,path"
    for config in configuration_file_key_words:
        header = header + "," + config
    output_write(framework, "currentframeworkversion", "currentframeworkversion", header, True)
示例#18
0
 def output_callback(path, filter_result):
     path = path.strip()
     path = path[brick_path_len+1:]
     output_write(fout, path, args.output_prefix, encode=True)
示例#19
0
 def output_callback(path, filter_result):
     path = path.strip()
     path = path[brick_path_len+1:]
     output_write(fout, path, args.output_prefix,
                  encode=(not args.no_encode), tag=args.tag,
                  field_separator=args.field_separator)
示例#20
0
 def output_callback(path, filter_result):
     path = path.strip()
     path = path[brick_path_len + 1:]
     output_write(fout, path, args.output_prefix, encode=True)
示例#21
0
def gfid_to_path_using_pgfid(brick, gfids_file, output_file, outfile_failures):
    """
    Parent GFID is saved as xattr, collect Parent GFIDs from all
    the files from gfids_file. Convert parent GFID to path and Crawl
    each directories to get the list of files/dirs having same inode number.
    Do find with maxdepth as 1 and print the output in <INODE_NUM> <PATH>
    format, use this output to look into in memory dictionary of inode
    numbers got from the list of GFIDs
    """
    with open(output_file, "a+") as fout:
        pgfids = set()
        inode_dict = {}
        with open(gfids_file) as f:
            for gfid in f:
                gfid = gfid.strip()
                p = os.path.join(brick,
                                 ".glusterfs",
                                 gfid[0:2],
                                 gfid[2:4],
                                 gfid)
                if os.path.islink(p):
                    path = symlink_gfid_to_path(brick, gfid)
                    output_write(fout, path, args.output_prefix)
                else:
                    try:
                        inode_dict[str(os.stat(p).st_ino)] = 1
                        file_xattrs = xattr.list(p)
                        num_parent_gfid = 0
                        for x in file_xattrs:
                            if x.startswith("trusted.pgfid."):
                                num_parent_gfid += 1
                                pgfids.add(x.split(".")[-1])

                        if num_parent_gfid == 0:
                            with open(outfile_failures, "a") as f:
                                f.write("%s\n" % gfid)
                                f.flush()
                                os.fsync(f.fileno())

                    except (IOError, OSError) as e:
                        if e.errno == ENOENT:
                            continue
                        else:
                            fail("%s Failed to convert to path from "
                                 "GFID %s: %s" % (brick, gfid, e),
                                 logger=logger)

        if not inode_dict:
            return

        def inode_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and inode_dict.get(str(st.st_ino), None):
                return True

            return False

        # Length of brick path, to remove from output path
        brick_path_len = len(brick)

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len+1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]

        for pgfid in pgfids:
            path = symlink_gfid_to_path(brick, pgfid)
            find(os.path.join(brick, path),
                 callback_func=output_callback,
                 filter_func=inode_filter,
                 ignore_dirs=ignore_dirs,
                 subdirs_crawl=False)

        fout.flush()
        os.fsync(fout.fileno())
示例#22
0
 def output_callback(path):
     path = path.strip()
     path = path[brick_path_len+1:]
     output_write(fout, path, args.output_prefix)
示例#23
0
 def output_callback(path):
     path = path.strip()
     path = path[brick_path_len + 1:]
     output_write(fout, path, args.output_prefix)
示例#24
0
def gfid_to_path_using_pgfid(brick, gfids_file, output_file, outfile_failures):
    """
    Parent GFID is saved as xattr, collect Parent GFIDs from all
    the files from gfids_file. Convert parent GFID to path and Crawl
    each directories to get the list of files/dirs having same inode number.
    Do find with maxdepth as 1 and print the output in <INODE_NUM> <PATH>
    format, use this output to look into in memory dictionary of inode
    numbers got from the list of GFIDs
    """
    with open(output_file, "a+") as fout:
        pgfids = set()
        inode_dict = {}
        with open(gfids_file) as f:
            for gfid in f:
                gfid = gfid.strip()
                p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4],
                                 gfid)
                if os.path.islink(p):
                    path = symlink_gfid_to_path(brick, gfid)
                    output_write(fout, path, args.output_prefix)
                else:
                    try:
                        inode_dict[str(os.stat(p).st_ino)] = 1
                        file_xattrs = xattr.list(p)
                        num_parent_gfid = 0
                        for x in file_xattrs:
                            if x.startswith("trusted.pgfid."):
                                num_parent_gfid += 1
                                pgfids.add(x.split(".")[-1])

                        if num_parent_gfid == 0:
                            with open(outfile_failures, "a") as f:
                                f.write("%s\n" % gfid)
                                f.flush()
                                os.fsync(f.fileno())

                    except (IOError, OSError) as e:
                        if e.errno == ENOENT:
                            continue
                        else:
                            fail("%s Failed to convert to path from "
                                 "GFID %s: %s" % (brick, gfid, e),
                                 logger=logger)

        if not inode_dict:
            return

        def inode_filter(path):
            try:
                st = os.lstat(path)
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    st = None
                else:
                    raise

            if st and inode_dict.get(str(st.st_ino), None):
                return True

            return False

        # Length of brick path, to remove from output path
        brick_path_len = len(brick)

        def output_callback(path):
            path = path.strip()
            path = path[brick_path_len + 1:]
            output_write(fout, path, args.output_prefix)

        ignore_dirs = [
            os.path.join(brick, dirname)
            for dirname in conf.get_opt("brick_ignore_dirs").split(",")
        ]

        for pgfid in pgfids:
            path = symlink_gfid_to_path(brick, pgfid)
            find(os.path.join(brick, path),
                 callback_func=output_callback,
                 filter_func=inode_filter,
                 ignore_dirs=ignore_dirs,
                 subdirs_crawl=False)

        fout.flush()
        os.fsync(fout.fileno())