Пример #1
0
def walk(repoDir):
    todos = []
    repoDirLen = len(repoDir)
    for dirname, dirnames, filenames in os.walk(repoDir):
        for filename in filenames:
            try:
                fin = codecs.open(os.path.join(dirname, filename), encoding = 'utf-8')
            except:
                log(WarningLevels.Warn, "File %s cannot be opened. Skipping."%(filename))
                continue

            parsed = parse(filename, fin)
            
            #No TODO's were found
            if(len(parsed) == 0):
                continue
            
            parsed = json.loads(parsed)
            for p in parsed:
                p['filename'] = os.path.join(dirname, filename)[repoDirLen:]
            if len(parsed) > 0:
                todos = todos + parsed

        # Advanced usage:
        # editing the 'dirnames' list will stop os.walk() from recursing into there.
        dirnames[:] = [dn for dn in dirnames if dn not in IGNORE_LIST]

    return todos
Пример #2
0
def runWorker(status):
    #This causes this thread to ignore interrupt signals so theya re only handled by parent
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    #Loop will be closed externally
    while status.value != WorkerStatus.Dead:
        try:
            cloneCount = redis.llen(RepoQueues.Cloning)
            parseCount = redis.llen(RepoQueues.Parsing)
        except:
            log(WarningLevels.Fatal, "Cloning Worker unable to reach Redis")
            break  
            
        if cloneCount > 0 and parseCount < int(settings.maxParseQueueCount):
            repoKey = redis.lpop(RepoQueues.Cloning)
            
            repo = Repo()
            repo.loadFromKey(repoKey)

            #sanity check our loaded key
            assert repo.key() == repoKey, "Bad repo saved in cloning Queue! Key %s not found!"%(repoKey)

            #clone the repo and add it to the parse queue
            src.todoMelvin.checkoutRepo(repo)
            redis.rpush(RepoQueues.Parsing, repoKey)
        else:
            sleepTime = float(settings.clonerSleepTime)
            log(WarningLevels.Debug, "Cloning Worker going to sleep...")

            #Set to sleeping for faster shutdown
            status.value = WorkerStatus.Sleeping
            time.sleep(sleepTime)
            status.value = WorkerStatus.Working
Пример #3
0
def checkoutRepo(repo):
    log(WarningLevels.Info, "Cloning %s..."%(repo.key()))  
    callWithLogging(['git', 'clone', '--quiet', repo.gitUrl, 'repos/%s' % (repo.key().replace('/', '-'))])
    
    setCommitSHAFromClone(repo)
    repo.status = "Cloned"
    repo.save()
Пример #4
0
def parseRepo(repo):
    src.todoMelvin.parseRepoForTodos(repo)
    
    if len(repo.Todos) > 0:
        redis.rpush(RepoQueues.Posting, repo.key())
    else:
        log(WarningLevels.Debug, "0 TODOs found, deleting from Redis.") 
        redis.delete(repo.key())
Пример #5
0
def addRepoToRedis(repo):
    redisRepo = None
    
    if not repoExists(repo.owner.login, repo.name):          
        redisRepo = addNewRepo(repo)
        log(WarningLevels.Info, "New Repo %s/%s added to Redis"%(repo.owner.login, repo.name))  
        
    return redisRepo
Пример #6
0
 def getGithubSHA(self, gh):
     
     try:
         branches = gh.repos.list_branches(self.userName, self.repoName)
         for branch in branches.all():
             if branch.name == self.branch:
                 return branch.commit.sha
     except:
         log(WarningLevels.Warn, "Failed to get SHA for %s/%s"%(self.userName, self.repoName))         
     
             
     return None
Пример #7
0
def parseRepoForTodos(repo):
    path = os.path.join(os.getcwd(), 'repos', repo.key().replace('/', '-'))
    
    log(WarningLevels.Info, "Parsing repo %s for TODOs..."%(repo.key()))
    
    todoList = walk(path)
    
    log(WarningLevels.Info, "%i TODOs found in %s"%(len(todoList), repo.key())) 
    
    for todo in todoList:
        buildTodo(repo, todo)  
    
    repo.status = "Parsed"
    repo.save()
Пример #8
0
def runWorker(status):
    #This causes this thread to ignore interrupt signals so theya re only handled by parent
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    #Loop will be closed externally
    while status.value != WorkerStatus.Dead:
        try:
            parseCount = redis.llen(RepoQueues.Parsing)
        except:
            log(WarningLevels.Fatal, "Parsing Worker unable to reach Redis")
            break  
            
        if parseCount > 0:
            repoKey = redis.lpop(RepoQueues.Parsing)
            
            repo = Repo()
            repo.loadFromKey(repoKey)

            #sanity check our loaded key
            assert repo.key() == repoKey, "Bad repo saved in parsing Queue! Key %s not found!"%(repoKey)

            #Parse repo for todos and then deletelocal content
            parser = multiprocessing.Process(target = parseRepo, args = (repo,))

            startTime = time.time()
            parser.start()

            while parser.is_alive():
                time.sleep(0.5)

                if time.time() - startTime > float(settings.parserRepoTimeout):
                    parser.terminate()
                    parser.join()
                    log(WarningLevels.Warn, "Parse timed out, skipping the rest of the parse.")

                    redis.delete(repoKey)
                    break

                if status.value == WorkerStatus.Dead:
                    #Worker was killed during parsing, cleanup
                    parser.terminate()
                    parser.join()
                    log(WarningLevels.Debug, "Parsing Interrupted, returning to parsing queue.")
                    redis.rpush(RepoQueues.Parsing, repoKey)
                    return #Skip the rest and kill the process
            
            
            src.todoMelvin.deleteLocalRepo(repo)
            
            
        else:
            sleepTime = float(settings.parserSleepTime)
            log(WarningLevels.Debug, "Parsing Worker going to sleep...")

            #Set to sleeping for faster shutdown
            status.value = WorkerStatus.Sleeping
            time.sleep(sleepTime)
            status.value = WorkerStatus.Working
Пример #9
0
def findRepos(gh, count):
    repoList = []
    
    if count <= 0: return repoList

    while len(repoList) < count:
        for event in gh.events.list().iterator():
            repo = checkForValidEvent(gh, event)
        
            if repo and repo not in repoList:
                repoList.append(repo)
                if len(repoList) == count: 
                    log(WarningLevels.Info, "%i valid repos found from Github"%(len(repoList)))
                    return repoList
                
    log(WarningLevels.Info, "%i valid repos found from Github"%(len(repoList)))            
    return repoList
Пример #10
0
    def format(self, tokensource, outfile):
        linenumber = 1
        # look for todos.
        comments = []
        t = clock()

        for ttype, value in tokensource:            
            #skip giant comments       
            if len(value) > int(settings.arbitraryTokenMaxLength):
                log(WarningLevels.Debug, "Large Comment Skipped.  Size: %s Max: %s"%(len(value), settings.arbitraryTokenMaxLength))
                continue
            
            #Dont allow parsing a file for longer than the timeout  
            if clock() - t >= float(settings.fileParsingTimeout):
                log(WarningLevels.Debug, "File timeout. %i TODOs committed"%(len(comments)))
                outfile.write(json.dumps(comments))
                return            

            if ttype.__str__() == Comment.__str__() or ttype.parent.__str__() == Comment.__str__():
                
                todoQualified = False
                
                for string in todoQualifiers:
                    if string in value.lower():
                        todoQualified= True
                        break
                
                for string in todoDisqualifiers:
                    if string in value.lower():
                        todoQualified= False       
                        break 
                
                if todoQualified:
                    comments.append({
                        'value': value,
                        'linenumber': linenumber,
                        })
            linenumber += self.instances(value, '\n')

        # This feels so f*****g derpy but the Formatter doesn't let me pass 
        #   out python objects. JSON ftw.
        outfile.write(json.dumps(comments))
Пример #11
0
def runWorker(status):
    # This causes this thread to ignore interrupt signals so theya re only handled by parent
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    # Loop will be closed externally
    while status.value != WorkerStatus.Dead:
        try:
            postCount = redis.llen(RepoQueues.Posting)
        except:
            log(WarningLevels.Fatal, "Posting Worker unable to reach Redis")
            break

        if postCount > 0:
            repoKey = redis.lpop(RepoQueues.Posting)

            repo = Repo()
            repo.loadFromKey(repoKey)

            # sanity check our loaded key
            assert repo.key() == repoKey, "Bad repo saved in posting Queue! Key %s not found!" % (repoKey)

            for todo in repo.Todos:
                if len(todo.issueURL) == 0:
                    repo.lastTodoPosted = todo.key(repo)
                    repo.lastTodoPostDate = datetime.now().strftime("%m/%d/%Y %H:%M:%S")

                    # Generate the issue
                    data = src.todoIssueGenerator.buildIssue(todo, repo)

                    # post the damn issue and save the url
                    issue = None

                    if settings.debug.lower() == "true":
                        issue = gh.issues.create(data, "p4r4digm", "todo-helper")  # post to our todo-helper
                    else:
                        issue = gh.issues.create(data, repo.userName, repo.repoName)
                        pass

                    todo.issueURL = issue.url

                    # put todo in todo graveyard
                    redis.rpush(RepoQueues.TodoGY, todo.key(repo))

                    repo.save()

                    log(WarningLevels.Info, "Issue posted to Github!")
                    break

            # throw repo into graveyard
            redis.rpush(RepoQueues.RepoGY, repo.key())

        else:
            sleepTime = float(settings.posterSleepTime)
            log(WarningLevels.Debug, "Posting Worker going to sleep...")

            # Set to sleeping for faster shutdown
            status.value = WorkerStatus.Sleeping
            time.sleep(sleepTime)
            status.value = WorkerStatus.Working
Пример #12
0
def runWorker(status):
    #This causes this thread to ignore interrupt signals so theya re only handled by parent
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    minCount = int(settings.minCloneQueueCount)
    maxCount = int(settings.maxCloneQueueCount)

    #Loop will be closed externally
    while status.value != WorkerStatus.Dead:
        try:
            cloneCount = redis.llen(RepoQueues.Cloning)
        except:
            log(WarningLevels.Fatal, "Tagging Worker unable to reach Redis")
            break

        if cloneCount < minCount:
            log(WarningLevels.Info, "Tagger detected low clone queue, Searching for %i new repos..."%(maxCount - cloneCount))
            repoList = src.todoMelvin.findRepos(gh, maxCount - cloneCount)
            
            addedCount = 0
            for r in repoList:
                #attempts to add repo to redis (is validaed first)
                repo = src.todoMelvin.addRepoToRedis(r)

                #Repo was added, tag it in the cloning queue
                if repo:
                    redis.rpush(RepoQueues.Cloning, repo.key())
                    addedCount += 1

            log(WarningLevels.Info, "Tagger added %i new repos to cloning Queue."%(addedCount))
        else:
            sleepTime = float(settings.taggerSleepTime)
            log(WarningLevels.Debug, "Tagger queue is full.  Going to sleep...")

            #Set to sleeping for faster shutdown
            status.value = WorkerStatus.Sleeping
            time.sleep(sleepTime)
            status.value = WorkerStatus.Working
Пример #13
0
def parse(filename, filestream):
    try:
        codeInput = filestream.read().encode('ascii', 'replace') #replace unreadable unicode chars to '?'
    except:
        log(WarningLevels.Debug, "Failed to read file %s as unicode."%(filename))
        return []

    try:
        lexer = guess_lexer_for_filename(filename, codeInput)
        log(WarningLevels.Debug, "Parsing %s with Lexer %s"%(filename, lexer.name))
        return highlight(
            codeInput, 
            lexer, 
            NullFormatter())
    except ClassNotFound:
        log(WarningLevels.Debug, "Lexer not found for file %s"%(filename))
        return []
Пример #14
0
def main(argv):
    src.todoLogging.logSender = "PAR%s"%(argv)

    log(WarningLevels.Info, "Starting Parsing Worker.")

    #async global status value that is shared with processes
    status = multiprocessing.Value('i', WorkerStatus.Working)

    try:
        #Start the function and wait for it to end
        process = multiprocessing.Process(target = runWorker, args = (status, ))
        process.start()
        process.join()

    except KeyboardInterrupt, SystemExit:
        if status.value == WorkerStatus.Sleeping:
            log(WarningLevels.Info, "Shutdown signal received while asleep.  Parsing worker shutting down.")
            process.terminate()
            process.join()
        else:
            log(WarningLevels.Info, "Shutdown signal received.  Allow Parser to finish current operation.")
            status.value = WorkerStatus.Dead
            process.join()   
Пример #15
0
def deleteLocalRepo(repo):
    log(WarningLevels.Info, "Deleting local repo %s/%s"%(repo.userName, repo.repoName)) 
    callWithLogging(['rm', '-rf', 'repos/repos::%s-%s'%(repo.userName, repo.repoName)])
Пример #16
0
    log(WarningLevels.Info, "Starting Tagging Worker.")

    #async global status value that is shared with processes
    status = multiprocessing.Value('i', WorkerStatus.Working)

    try:
        #Start the function and wait for it to end
        process = multiprocessing.Process(target = runWorker, args = (status, ))
        process.start()
        process.join()

    except KeyboardInterrupt, SystemExit:
        if status.value == WorkerStatus.Sleeping:
            log(WarningLevels.Info, "Shutdown signal received while asleep.  Tagging worker shutting down.")
            process.terminate()
            process.join()
        else:
            log(WarningLevels.Info, "Shutdown signal received.  Allow Tagger to finish current operation.")
            status.value = WorkerStatus.Dead
            process.join()   

    log(WarningLevels.Info, "Tagging Worker has shut down.")    


if __name__ == "__main__": 
    if len(sys.argv) > 1:        
        main(sys.argv[1])
    else:
        main("0")

Пример #17
0
def renderTemplate(tempString, data):
    try:
        return Template.render(Template(tempString), data)
    except: 
        log(WarningLevels.Debug, "TODO Failed to render to a template \'%s\': %s"%(tempString, data))
        return None
Пример #18
0
        #Start the function and wait for it to end
        process = multiprocessing.Process(target = runWorker, args = (status, ))
        process.start()
        process.join()

    except KeyboardInterrupt, SystemExit:
        if status.value == WorkerStatus.Sleeping:
            log(WarningLevels.Info, "Shutdown signal received while asleep.  Parsing worker shutting down.")
            process.terminate()
            process.join()
        else:
            log(WarningLevels.Info, "Shutdown signal received.  Allow Parser to finish current operation.")
            status.value = WorkerStatus.Dead
            process.join()   

    log(WarningLevels.Info, "Parsing Worker has shut down.")    


if __name__ == "__main__": 
    if len(sys.argv) > 1:        
        main(sys.argv[1])
    else:
        main("0")