def walk(repoDir): todos = [] repoDirLen = len(repoDir) for dirname, dirnames, filenames in os.walk(repoDir): for filename in filenames: try: fin = codecs.open(os.path.join(dirname, filename), encoding = 'utf-8') except: log(WarningLevels.Warn, "File %s cannot be opened. Skipping."%(filename)) continue parsed = parse(filename, fin) #No TODO's were found if(len(parsed) == 0): continue parsed = json.loads(parsed) for p in parsed: p['filename'] = os.path.join(dirname, filename)[repoDirLen:] if len(parsed) > 0: todos = todos + parsed # Advanced usage: # editing the 'dirnames' list will stop os.walk() from recursing into there. dirnames[:] = [dn for dn in dirnames if dn not in IGNORE_LIST] return todos
def runWorker(status): #This causes this thread to ignore interrupt signals so theya re only handled by parent signal.signal(signal.SIGINT, signal.SIG_IGN) #Loop will be closed externally while status.value != WorkerStatus.Dead: try: cloneCount = redis.llen(RepoQueues.Cloning) parseCount = redis.llen(RepoQueues.Parsing) except: log(WarningLevels.Fatal, "Cloning Worker unable to reach Redis") break if cloneCount > 0 and parseCount < int(settings.maxParseQueueCount): repoKey = redis.lpop(RepoQueues.Cloning) repo = Repo() repo.loadFromKey(repoKey) #sanity check our loaded key assert repo.key() == repoKey, "Bad repo saved in cloning Queue! Key %s not found!"%(repoKey) #clone the repo and add it to the parse queue src.todoMelvin.checkoutRepo(repo) redis.rpush(RepoQueues.Parsing, repoKey) else: sleepTime = float(settings.clonerSleepTime) log(WarningLevels.Debug, "Cloning Worker going to sleep...") #Set to sleeping for faster shutdown status.value = WorkerStatus.Sleeping time.sleep(sleepTime) status.value = WorkerStatus.Working
def checkoutRepo(repo): log(WarningLevels.Info, "Cloning %s..."%(repo.key())) callWithLogging(['git', 'clone', '--quiet', repo.gitUrl, 'repos/%s' % (repo.key().replace('/', '-'))]) setCommitSHAFromClone(repo) repo.status = "Cloned" repo.save()
def parseRepo(repo): src.todoMelvin.parseRepoForTodos(repo) if len(repo.Todos) > 0: redis.rpush(RepoQueues.Posting, repo.key()) else: log(WarningLevels.Debug, "0 TODOs found, deleting from Redis.") redis.delete(repo.key())
def addRepoToRedis(repo): redisRepo = None if not repoExists(repo.owner.login, repo.name): redisRepo = addNewRepo(repo) log(WarningLevels.Info, "New Repo %s/%s added to Redis"%(repo.owner.login, repo.name)) return redisRepo
def getGithubSHA(self, gh): try: branches = gh.repos.list_branches(self.userName, self.repoName) for branch in branches.all(): if branch.name == self.branch: return branch.commit.sha except: log(WarningLevels.Warn, "Failed to get SHA for %s/%s"%(self.userName, self.repoName)) return None
def parseRepoForTodos(repo): path = os.path.join(os.getcwd(), 'repos', repo.key().replace('/', '-')) log(WarningLevels.Info, "Parsing repo %s for TODOs..."%(repo.key())) todoList = walk(path) log(WarningLevels.Info, "%i TODOs found in %s"%(len(todoList), repo.key())) for todo in todoList: buildTodo(repo, todo) repo.status = "Parsed" repo.save()
def runWorker(status): #This causes this thread to ignore interrupt signals so theya re only handled by parent signal.signal(signal.SIGINT, signal.SIG_IGN) #Loop will be closed externally while status.value != WorkerStatus.Dead: try: parseCount = redis.llen(RepoQueues.Parsing) except: log(WarningLevels.Fatal, "Parsing Worker unable to reach Redis") break if parseCount > 0: repoKey = redis.lpop(RepoQueues.Parsing) repo = Repo() repo.loadFromKey(repoKey) #sanity check our loaded key assert repo.key() == repoKey, "Bad repo saved in parsing Queue! Key %s not found!"%(repoKey) #Parse repo for todos and then deletelocal content parser = multiprocessing.Process(target = parseRepo, args = (repo,)) startTime = time.time() parser.start() while parser.is_alive(): time.sleep(0.5) if time.time() - startTime > float(settings.parserRepoTimeout): parser.terminate() parser.join() log(WarningLevels.Warn, "Parse timed out, skipping the rest of the parse.") redis.delete(repoKey) break if status.value == WorkerStatus.Dead: #Worker was killed during parsing, cleanup parser.terminate() parser.join() log(WarningLevels.Debug, "Parsing Interrupted, returning to parsing queue.") redis.rpush(RepoQueues.Parsing, repoKey) return #Skip the rest and kill the process src.todoMelvin.deleteLocalRepo(repo) else: sleepTime = float(settings.parserSleepTime) log(WarningLevels.Debug, "Parsing Worker going to sleep...") #Set to sleeping for faster shutdown status.value = WorkerStatus.Sleeping time.sleep(sleepTime) status.value = WorkerStatus.Working
def findRepos(gh, count): repoList = [] if count <= 0: return repoList while len(repoList) < count: for event in gh.events.list().iterator(): repo = checkForValidEvent(gh, event) if repo and repo not in repoList: repoList.append(repo) if len(repoList) == count: log(WarningLevels.Info, "%i valid repos found from Github"%(len(repoList))) return repoList log(WarningLevels.Info, "%i valid repos found from Github"%(len(repoList))) return repoList
def format(self, tokensource, outfile): linenumber = 1 # look for todos. comments = [] t = clock() for ttype, value in tokensource: #skip giant comments if len(value) > int(settings.arbitraryTokenMaxLength): log(WarningLevels.Debug, "Large Comment Skipped. Size: %s Max: %s"%(len(value), settings.arbitraryTokenMaxLength)) continue #Dont allow parsing a file for longer than the timeout if clock() - t >= float(settings.fileParsingTimeout): log(WarningLevels.Debug, "File timeout. %i TODOs committed"%(len(comments))) outfile.write(json.dumps(comments)) return if ttype.__str__() == Comment.__str__() or ttype.parent.__str__() == Comment.__str__(): todoQualified = False for string in todoQualifiers: if string in value.lower(): todoQualified= True break for string in todoDisqualifiers: if string in value.lower(): todoQualified= False break if todoQualified: comments.append({ 'value': value, 'linenumber': linenumber, }) linenumber += self.instances(value, '\n') # This feels so f*****g derpy but the Formatter doesn't let me pass # out python objects. JSON ftw. outfile.write(json.dumps(comments))
def runWorker(status): # This causes this thread to ignore interrupt signals so theya re only handled by parent signal.signal(signal.SIGINT, signal.SIG_IGN) # Loop will be closed externally while status.value != WorkerStatus.Dead: try: postCount = redis.llen(RepoQueues.Posting) except: log(WarningLevels.Fatal, "Posting Worker unable to reach Redis") break if postCount > 0: repoKey = redis.lpop(RepoQueues.Posting) repo = Repo() repo.loadFromKey(repoKey) # sanity check our loaded key assert repo.key() == repoKey, "Bad repo saved in posting Queue! Key %s not found!" % (repoKey) for todo in repo.Todos: if len(todo.issueURL) == 0: repo.lastTodoPosted = todo.key(repo) repo.lastTodoPostDate = datetime.now().strftime("%m/%d/%Y %H:%M:%S") # Generate the issue data = src.todoIssueGenerator.buildIssue(todo, repo) # post the damn issue and save the url issue = None if settings.debug.lower() == "true": issue = gh.issues.create(data, "p4r4digm", "todo-helper") # post to our todo-helper else: issue = gh.issues.create(data, repo.userName, repo.repoName) pass todo.issueURL = issue.url # put todo in todo graveyard redis.rpush(RepoQueues.TodoGY, todo.key(repo)) repo.save() log(WarningLevels.Info, "Issue posted to Github!") break # throw repo into graveyard redis.rpush(RepoQueues.RepoGY, repo.key()) else: sleepTime = float(settings.posterSleepTime) log(WarningLevels.Debug, "Posting Worker going to sleep...") # Set to sleeping for faster shutdown status.value = WorkerStatus.Sleeping time.sleep(sleepTime) status.value = WorkerStatus.Working
def runWorker(status): #This causes this thread to ignore interrupt signals so theya re only handled by parent signal.signal(signal.SIGINT, signal.SIG_IGN) minCount = int(settings.minCloneQueueCount) maxCount = int(settings.maxCloneQueueCount) #Loop will be closed externally while status.value != WorkerStatus.Dead: try: cloneCount = redis.llen(RepoQueues.Cloning) except: log(WarningLevels.Fatal, "Tagging Worker unable to reach Redis") break if cloneCount < minCount: log(WarningLevels.Info, "Tagger detected low clone queue, Searching for %i new repos..."%(maxCount - cloneCount)) repoList = src.todoMelvin.findRepos(gh, maxCount - cloneCount) addedCount = 0 for r in repoList: #attempts to add repo to redis (is validaed first) repo = src.todoMelvin.addRepoToRedis(r) #Repo was added, tag it in the cloning queue if repo: redis.rpush(RepoQueues.Cloning, repo.key()) addedCount += 1 log(WarningLevels.Info, "Tagger added %i new repos to cloning Queue."%(addedCount)) else: sleepTime = float(settings.taggerSleepTime) log(WarningLevels.Debug, "Tagger queue is full. Going to sleep...") #Set to sleeping for faster shutdown status.value = WorkerStatus.Sleeping time.sleep(sleepTime) status.value = WorkerStatus.Working
def parse(filename, filestream): try: codeInput = filestream.read().encode('ascii', 'replace') #replace unreadable unicode chars to '?' except: log(WarningLevels.Debug, "Failed to read file %s as unicode."%(filename)) return [] try: lexer = guess_lexer_for_filename(filename, codeInput) log(WarningLevels.Debug, "Parsing %s with Lexer %s"%(filename, lexer.name)) return highlight( codeInput, lexer, NullFormatter()) except ClassNotFound: log(WarningLevels.Debug, "Lexer not found for file %s"%(filename)) return []
def main(argv): src.todoLogging.logSender = "PAR%s"%(argv) log(WarningLevels.Info, "Starting Parsing Worker.") #async global status value that is shared with processes status = multiprocessing.Value('i', WorkerStatus.Working) try: #Start the function and wait for it to end process = multiprocessing.Process(target = runWorker, args = (status, )) process.start() process.join() except KeyboardInterrupt, SystemExit: if status.value == WorkerStatus.Sleeping: log(WarningLevels.Info, "Shutdown signal received while asleep. Parsing worker shutting down.") process.terminate() process.join() else: log(WarningLevels.Info, "Shutdown signal received. Allow Parser to finish current operation.") status.value = WorkerStatus.Dead process.join()
def deleteLocalRepo(repo): log(WarningLevels.Info, "Deleting local repo %s/%s"%(repo.userName, repo.repoName)) callWithLogging(['rm', '-rf', 'repos/repos::%s-%s'%(repo.userName, repo.repoName)])
log(WarningLevels.Info, "Starting Tagging Worker.") #async global status value that is shared with processes status = multiprocessing.Value('i', WorkerStatus.Working) try: #Start the function and wait for it to end process = multiprocessing.Process(target = runWorker, args = (status, )) process.start() process.join() except KeyboardInterrupt, SystemExit: if status.value == WorkerStatus.Sleeping: log(WarningLevels.Info, "Shutdown signal received while asleep. Tagging worker shutting down.") process.terminate() process.join() else: log(WarningLevels.Info, "Shutdown signal received. Allow Tagger to finish current operation.") status.value = WorkerStatus.Dead process.join() log(WarningLevels.Info, "Tagging Worker has shut down.") if __name__ == "__main__": if len(sys.argv) > 1: main(sys.argv[1]) else: main("0")
def renderTemplate(tempString, data): try: return Template.render(Template(tempString), data) except: log(WarningLevels.Debug, "TODO Failed to render to a template \'%s\': %s"%(tempString, data)) return None
#Start the function and wait for it to end process = multiprocessing.Process(target = runWorker, args = (status, )) process.start() process.join() except KeyboardInterrupt, SystemExit: if status.value == WorkerStatus.Sleeping: log(WarningLevels.Info, "Shutdown signal received while asleep. Parsing worker shutting down.") process.terminate() process.join() else: log(WarningLevels.Info, "Shutdown signal received. Allow Parser to finish current operation.") status.value = WorkerStatus.Dead process.join() log(WarningLevels.Info, "Parsing Worker has shut down.") if __name__ == "__main__": if len(sys.argv) > 1: main(sys.argv[1]) else: main("0")