Пример #1
0
def printStats(flow):
    try:
        stats_path = getArg(flow, 's', 'statsfile')
    except:
        stats_path = DEFAULT_STATS_PATH

    if os.path.isfile(stats_path):
        print "Statistics file: '%s'" % (stats_path)
        print SharedData(stats_path, multiprocessing.Lock())
    else:
        print "Given path is not a valid file: '%s'" % (stats_path)
Пример #2
0
def combineStatsFiles(flow):
    _files = getArg(flow, "s", "stats")
    _out   = getArg(flow, "out")
    
    _files = _files.split(",")
    
    try:
        # Remove empty elements.
        _files.remove("")
        
    except ValueError as err:
        # No element was removed.
        pass
    
    # Remove unnecessary whitespace characters.
    _files = [_file.strip() for _file in _files]
    
    
    if len(_files) > 1:
        for _file in _files:
            if not os.path.isfile(_file):
                raise Exception("'%s' is not a file." % (_file))
            
        lock = multiprocessing.Lock()
        first_data = SharedData(_files[0], lock)
        
        for _file in _files[1:]:
            data = SharedData(_file, lock)
            first_data.combineWith(data)
            
        first_data.saveToFile(_out)
        
        print "Stats files combined into '%s'" % (_out)
        
    else:
        print "Specify more than one statistics file. Separate them with commas."
        print "I.e. -s \"\""
Пример #3
0
def printResults(flow):
    try:
        stats_path = getArg(flow, 's', 'statsfile')
    except:
        stats_path = DEFAULT_STATS_PATH

    if os.path.isfile(stats_path):
        clones = SharedData(stats_path, multiprocessing.Lock()).getClones()
        print "Found %d code clones saved in file '%s':" % (len(clones),
                                                            stats_path)
        for i, clone in enumerate(clones):
            print str(i) + ".", clone

    else:
        print "Given path is not a valid file: '%s'" % (stats_path)
Пример #4
0
def combineStatsFiles(flow):
    _files = getArg(flow, "s", "stats")
    _out = getArg(flow, "out")

    _files = _files.split(",")

    try:
        # Remove empty elements.
        _files.remove("")

    except ValueError as err:
        # No element was removed.
        pass

    # Remove unnecessary whitespace characters.
    _files = [_file.strip() for _file in _files]

    if len(_files) > 1:
        for _file in _files:
            if not os.path.isfile(_file):
                raise Exception("'%s' is not a file." % (_file))

        lock = multiprocessing.Lock()
        first_data = SharedData(_files[0], lock)

        for _file in _files[1:]:
            data = SharedData(_file, lock)
            first_data.combineWith(data)

        first_data.saveToFile(_out)

        print "Stats files combined into '%s'" % (_out)

    else:
        print "Specify more than one statistics file. Separate them with commas."
        print "I.e. -s \"\""
Пример #5
0
def startSearchMode(flow, continuous=False):
    flow["in"] = os.path.abspath(flow["in"])
    if not os.path.exists(flow["in"]):
        print "Given path (-in) does not exist."
        sys.exit()

    level = 0
    multithreads = 0
    neo4j_helper = Neo4jHelper()
    heap_size = Configurator.getHeapVal()
    neo4j_helper.setHeapsize(heap_size[0], heap_size[1])

    if continuous:
        # Continuous mode was specified, so read the config file
        try:
            stats_path = getArg(flow, "s", "statsfile")
        except:
            stats_path = DEFAULT_STATS_PATH

        lock = multiprocessing.Lock()
        shared_data = SharedData(stats_path, lock, in_path=flow["in"])
        in_path = shared_data.getInPath()

        if in_path != flow["in"]:
            print(
                "The given path with \"-in\" is not the path, "
                "which was used before.")
            _ = raw_input("Ctrl-C or Ctrl-D to abort. "
                          "Press any key to continue")

        shared_data.setInPath(flow["in"])

        Neo4jHelper.setStatisticsObj(shared_data)

    try:
        multithreads = int(getArg(flow, "m", "multithreading"))
    except:
        pass

    code_path = getArg(flow, "q", "queries")
    code = []

    # Read given query.
    if os.path.isfile(code_path):
        with open(code_path, "r") as fh:
            code.append(QueryFile(code_path, fh.read()))

    elif os.path.isdir(code_path):
        # Given path is a directory - get all files recursively inside the
        # directory.
        for path, _, files in os.walk(code_path):
            for name in files:
                file_path = os.path.join(path, name)
                with open(file_path, "r") as fh:
                    code.append(QueryFile(file_path, fh.read()))

        if not code:
            # Did not find any file recursively inside 'code_path'.
            print "Query-path (-q/--queries) does not contain any files."
            sys.exit()

    else:
        # Path does not exist
        print "Query-Path (-q/--queries) does not exist."
        sys.exit()

    try:
        level = int(getArg(flow, "l", "level"))

    except ArgException:
        # Parameter "-l/--level" was not specified.
        pass

    if level == 0:
        # Analyse specified file/files in specified directory with given
        # gremlin query/queries.
        Neo4jHelper.analyseData((code, flow["in"], 1))

    else:
        # Analyse folders 'level' levels under specified path.
        try:
            # Get the root directory of every project in a generator.
            path_generator = getRootDirectories(flow["in"], level)

            if continuous:
                # Check if given in-path is the same as the one in the
                # given stats file.

                if in_path == flow["in"]:
                    # Skip generator elements if they were already
                    # analysed before.
                    projects_analysed_count = shared_data.getProjectsCount()
                    skipGeneratorElements(path_generator,
                                          projects_analysed_count)

        except Exception as err:
            print "An exception occured: %s" % err
            sys.exit()

        projects_analysed = 0

        if multithreads > 1:
            # Multithreading was specified.

            process_number_generator = ProcessIdGenerator()

            # Start a lazy pool of processes.
            pool = LazyMP().poolImapUnordered(
                analyseDataHelper,
                itertools.izip(
                    itertools.repeat(code),
                    path_generator,
                    process_number_generator.getGenerator([1, 2, 3, 4]),
                ), multithreads, process_number_generator)

            # And let them work.
            try:
                while True:
                    # Let multiprocessing pool process all arguments.
                    pool.next()
                    projects_analysed += 1

            except Exception as err:
                # Done
                print err
                pass

        else:
            # No multithreading.
            for path in path_generator:
                neo4j_helper.analyseData((code, path, 1))
                projects_analysed += 1

        if projects_analysed == 0:
            print "No project analysed for path: '%s'" % (flow["in"])
Пример #6
0
def startSearchMode(flow, continuous=False):
    flow["in"] = os.path.abspath(flow["in"])
    if not os.path.exists(flow["in"]):
        print "Given path (-in) does not exist."
        sys.exit()
    
    level = 0
    multithreads = 0
    neo4j_helper = Neo4jHelper()
    heap_size = Configurator.getHeapVal()
    neo4j_helper.setHeapsize(heap_size[0], heap_size[1])
    
    if continuous:
        # Continuous mode was specified, so read the config file
        try:
            stats_path = getArg(flow, "s", "statsfile")
        except:
            stats_path = DEFAULT_STATS_PATH

        lock = multiprocessing.Lock()
        shared_data = SharedData(stats_path, lock, in_path=flow["in"])
        in_path = shared_data.getInPath()
        
        if in_path != flow["in"]:
                    print (
                        "The given path with \"-in\" is not the path, "
                        "which was used before."
                        )
                    _ = raw_input("Ctrl-C or Ctrl-D to abort. "
                                           "Press any key to continue")
        
        shared_data.setInPath(flow["in"])
        
        Neo4jHelper.setStatisticsObj(shared_data)

    try:
        multithreads = int(getArg(flow, "m", "multithreading"))
    except:
        pass

    code_path = getArg(flow, "q", "queries")
    code = []

    # Read given query.
    if os.path.isfile(code_path):
        with open(code_path, "r") as fh:
            code.append(
                    QueryFile(code_path, fh.read())
                    )
    
    elif os.path.isdir(code_path):
        # Given path is a directory - get all files recursively inside the
        # directory.
        for path, _, files in os.walk(code_path):
            for name in files:
                file_path = os.path.join(path, name)
                with open(file_path, "r") as fh:
                    code.append(
                            QueryFile(file_path, fh.read())
                            )
        
        if not code:
            # Did not find any file recursively inside 'code_path'.
            print "Query-path (-q/--queries) does not contain any files."
            sys.exit()
                    
    else:
        # Path does not exist
        print "Query-Path (-q/--queries) does not exist."
        sys.exit()
    
    
    try:
        level = int(getArg(flow, "l", "level"))
        
    except ArgException:
        # Parameter "-l/--level" was not specified.
        pass
    
    if level == 0:
        # Analyse specified file/files in specified directory with given
        # gremlin query/queries.
        Neo4jHelper.analyseData((
                code, flow["in"], 1
                ))
    
    else:
        # Analyse folders 'level' levels under specified path.
        try:
            # Get the root directory of every project in a generator.
            path_generator = getRootDirectories(flow["in"], level)
            
            if continuous:
                # Check if given in-path is the same as the one in the
                # given stats file.
                
                if in_path == flow["in"]:
                    # Skip generator elements if they were already
                    # analysed before.
                    projects_analysed_count = shared_data.getProjectsCount()
                    skipGeneratorElements(path_generator, projects_analysed_count)

        except Exception as err:
            print "An exception occured: %s" % err
            sys.exit()

        projects_analysed = 0
        
        if multithreads > 1:
            # Multithreading was specified.

            process_number_generator = ProcessIdGenerator()
            
            # Start a lazy pool of processes.
            pool = LazyMP().poolImapUnordered(
                    analyseDataHelper, itertools.izip(
                            itertools.repeat(code), path_generator, 
                            process_number_generator.getGenerator([1,2,3,4]),
                            ),
                    multithreads,
                    process_number_generator
                    )

            # And let them work.
            try:
                while True:
                    # Let multiprocessing pool process all arguments.
                    pool.next()
                    projects_analysed += 1
                    
            except Exception as err:
                # Done
                print err
                pass

        else:    
            # No multithreading.
            for path in path_generator:
                neo4j_helper.analyseData((
                        code, path, 1
                        ))
                projects_analysed += 1
        
        if projects_analysed == 0:
            print "No project analysed for path: '%s'" %(
                                flow["in"]
                                )