def authenticate(self):
		"""
		authenticate()
		Authenticates this application to github using
		the cas-user git user credentials. This is hopefully temporary!
		"""

		s = requests.Session()
		username = config["github"]["user"]
		password = config["github"]["pass"]
		s.auth = (username, password)
		payload = {"scopes": ["repo"]}
		r = s.get(self.request_auth, params=payload)

		if r.headers.get('x-ratelimit-remaining') == '0':
			logging.info("Github quota limit hit -- waiting")

			# Wait up to a hour until we can continue..
			while r.headers.get('x-ratelimit-remaining') == '0':
				time.sleep(600) # Wait 10 minutes and try again
				r = s.get(self.request_auth, params=payload)
				data = r.json()

		data = r.json()[0]

		if r.status_code >= 400:
			msg = data.get('message')
			logging.error("Failed to authenticate issue tracker: \n" +msg)
			return # Exit
		else:
			self.auth_token = data.get("token")
			requests_left = r.headers.get('x-ratelimit-remaining')
			logging.info("Analyzer has " + requests_left + " issue tracker calls left this hour")
Пример #2
0
    def authenticate(self):
        """
        authenticate()
        Authenticates this application to github using
        the cas-user git user credentials. This is hopefully temporary!
        """

        s = requests.Session()
        username = config["github"]["user"]
        password = config["github"]["pass"]
        s.auth = (username, password)
        payload = {"scopes": ["repo"]}
        r = s.get(self.request_auth, params=payload)

        if r.headers.get('x-ratelimit-remaining') == '0':
            logging.info("Github quota limit hit -- waiting")

            # Wait up to a hour until we can continue..
            while r.headers.get('x-ratelimit-remaining') == '0':
                time.sleep(600) # Wait 10 minutes and try again
                r = s.get(self.request_auth, params=payload)
                data = r.json()

        data = r.json()[0]

        if r.status_code >= 400:
            msg = data.get('message')
            logging.error("Failed to authenticate issue tracker: \n" +msg)
            return # Exit
        else:
            self.auth_token = data.get("token")
            requests_left = r.headers.get('x-ratelimit-remaining')
            logging.info("Analyzer has " + requests_left + " issue tracker calls left this hour")
	def findIssueOpened(self, correctiveCommit):
		"""
		findIssueIds()
		If the corrective change/commit links to a issue in the issue tracker, returns
		the date of oldest open issue found otherwise returns none
		"""
		issue_opened = None

		if(self.issueTracker is None or hasattr(self.issueTracker, "getDateOpened") == False):
			return None

		idMatch = re.compile('#[\d]+')
		issue_ids = idMatch.findall(correctiveCommit.commit_message)
		issue_ids = [issue_id.strip('#') for issue_id in issue_ids] # Remove the '#' from ids

		if len(issue_ids) > 0:
			issue_opened = self.issueTracker.getDateOpened(issue_ids[0])
			# Use the oldest open bug
			for issue_id in issue_ids:
				logging.info('Searching for issue id: ' + issue_id)
				curr_issue_opened = self.issueTracker.getDateOpened(issue_id)

				# Verify that an issue was found.
				if curr_issue_opened is not None:
					if int(curr_issue_opened) < int(issue_opened):
						issue_opened = curr_issue_opened

		return issue_opened
Пример #4
0
    def checkIngestion(self):
        """Check if any repo needs to be ingested"""

        #print("checkIngestion")

        session = Session()
        repo_update_freq = int(config['repoUpdates']['freqInDays'])
        refresh_date_dateTime = (datetime.utcnow() - timedelta(days=repo_update_freq))
        refresh_date = str(refresh_date_dateTime)
        refresh_date_dateTime = datetime.strptime(refresh_date, "%Y-%m-%d %H:%M:%S.%f")
        refresh_date = refresh_date_dateTime.strftime("%Y-%m-%d %H:%M:%S.%f")

        repos_to_get = (session.query(Repository) 
                            .filter( 
                                (Repository.status == "Waiting to be Ingested") |
                                (Repository.ingestion_date < refresh_date_dateTime) &
                                (Repository.status != "Error") &
                                (Repository.status != "Analyzing"))
                            .all())

        for repo in repos_to_get:
            logging.info("Adding repo " + repo.id + " to work queue for ingesting")
            repo.status = "In Queue to be Ingested"
            session.commit() # update the status of repo
            self.workQueue.add_task(ingest,repo.id)

        session.close()
    def getMedian(self, metric):
        """
    Helper function for the method calculateMedians.
    Takes in a metric and returns a string property of the results
    @private
    """
        median_props = ""

        try:
            # R functions to be used
            medianFn = robjects.r["median"]
            wilcoxFn = robjects.r["wilcox.test"]

            metric_buggy = getattr(self.metrics, metric + "_buggy")
            metric_nonbuggy = getattr(self.metrics, metric + "_nonbuggy")

            # First check p-values, if signficant then calculate median
            pvalue = self.wilcoxFn(robjects.FloatVector(metric_buggy), robjects.FloatVector(metric_nonbuggy))[2][0]
            buggy_median = self.medianFn(robjects.FloatVector(metric_buggy))
            nonbuggy_median = self.medianFn(robjects.FloatVector(metric_nonbuggy))
            median_props += '"' + metric + 'buggy":"' + str(buggy_median[0]) + '", '
            median_props += '"' + metric + 'nonbuggy":"' + str(nonbuggy_median[0]) + '", '

            if pvalue <= self.psig:
                median_props += '"' + metric + '_sig":"1", '
            else:
                median_props += '"' + metric + '_sig":"0", '

        except:
            # catch the case where we haven't made any observations to do this metric
            logging.info("Metric " + metric + " could not be used in the median model for repo " + self.repo_id)

        return median_props
Пример #6
0
    def notify(self):
        """
		Notify all subscribers that repo has been analyzed and is ready
		to be viewed
		"""

        FROM = "*****@*****.**"
        TO = self.subscribers
        SUBJECT = "Your repository has been analyzed"
        TEXT = "Your analyzed repository is now ready to be viewed at http://kiwi.se.rit.edu/repo/" + self.repo

        # prepare actual message
        message = """\From: %s\nTo: %s\nSubject: %s\n\n%s""" % (
            FROM, ", ".join(TO), SUBJECT, TEXT)

        try:
            server = smtplib.SMTP("smtp.gmail.com", 587)
            server.ehlo()
            server.starttls()
            server.login(self.gmail_user, self.gmail_pwd)
            server.sendmail(FROM, TO, message)
            server.quit()

            logging.info("Notification sent successfully")

        except:
            logging.error("Failed to send notification")
Пример #7
0
    def findIssueOpened(self, correctiveCommit):
        """
		findIssueIds()
		If the corrective change/commit links to a issue in the issue tracker, returns
		the date of oldest open issue found otherwise returns none
		"""
        issue_opened = None

        if (self.issueTracker is None
                or hasattr(self.issueTracker, "getDateOpened") == False):
            return None

        idMatch = re.compile('#[\d]+')
        issue_ids = idMatch.findall(correctiveCommit.commit_message)
        issue_ids = [issue_id.strip('#')
                     for issue_id in issue_ids]  # Remove the '#' from ids

        if len(issue_ids) > 0:
            issue_opened = self.issueTracker.getDateOpened(issue_ids[0])
            # Use the oldest open bug
            for issue_id in issue_ids:
                logging.info('Searching for issue id: ' + issue_id)
                curr_issue_opened = self.issueTracker.getDateOpened(issue_id)

                # Verify that an issue was found.
                if curr_issue_opened is not None:
                    if int(curr_issue_opened) < int(issue_opened):
                        issue_opened = curr_issue_opened

        return issue_opened
	def notify(self):
		"""
		Notify all subscribers that repo has been analyzed and is ready
		to be viewed
		"""

		FROM = "*****@*****.**"
		TO = self.subscribers
		SUBJECT = "Your repository has been analyzed"
		TEXT = "Your analyzed repository is now ready to be viewed at http://kiwi.se.rit.edu/repo/" + self.repo

		# prepare actual message
		message = """\From: %s\nTo: %s\nSubject: %s\n\n%s""" % (FROM, ", ".join(TO), SUBJECT, TEXT)
		
		try:
			server = smtplib.SMTP("smtp.gmail.com", 587)
			server.ehlo()
			server.starttls()
			server.login(self.gmail_user, self.gmail_pwd) 
			server.sendmail(FROM, TO, message)
			server.quit()

			logging.info("Notification sent successfully")

		except:
			logging.error("Failed to send notification")
Пример #9
0
def analyzeRepo(repository_to_analyze, session):
    """
    Analyzes the given repository
    @param repository_to_analyze	The repository to analyze.
    @param session                  SQLAlchemy session
    @private
    """
    repo_name = repository_to_analyze.name
    repo_id = repository_to_analyze.id
    last_analysis_date = repository_to_analyze.analysis_date

    # Update status of repo to show it is analyzing
    repository_to_analyze.status = "Analyzing"
    session.commit()

    logging.info('Worker analyzing repository id ' + repo_id)

    # all commits in descending order
    all_commits = (session.query(Commit)
                   .filter(Commit.repository_id == repo_id)
                   .order_by(Commit.author_date_unix_timestamp.desc())
                   .all()
                   )

    # corrective commits in ascending order
    # if updating, only get the corrective commits that have not been linked yet.
    # No need to re-link corrective commits that have already been linked with the bug-inducing commit.
    corrective_commits = (session.query(Commit)
                          .filter(
                            (Commit.fix == "True") &
                            (Commit.repository_id == repo_id) &
                            (Commit.linked == False)
                          )
                          .order_by(Commit.author_date_unix_timestamp.asc())
                          .all()
                          )

    logging.info("Linking " + str(len(corrective_commits)) + " new corrective commits for repo " + repo_id)

    try:
        git_commit_linker = GitCommitLinker(repo_id)
        git_commit_linker.linkCorrectiveCommits(corrective_commits, all_commits)
    except Exception as e:
        logging.exception("Got an exception linking bug fixing changes to bug inducing changes for repo " + repo_id)
        repository_to_analyze.status = "Error"
        session.commit()  # update repo status
        raise

    # Signify to CAS Manager that this repo is ready to have it's model built
    if repository_to_analyze.status != "Error":
        repository_to_analyze.status = "In Queue to Build Model"
        session.commit()  # update repo status
Пример #10
0
def analyzeRepo(repository_to_analyze, session):
    """
	Analyzes the given repository
	@param repository_to_analyze	The repository to analyze.
	@param session                  SQLAlchemy session
	@private
	"""
    repo_name = repository_to_analyze.name
    repo_id = repository_to_analyze.id
    last_analysis_date = repository_to_analyze.analysis_date

    # Update status of repo to show it is analyzing
    repository_to_analyze.status = "Analyzing"
    session.commit()

    logging.info('Worker analyzing repository id ' + repo_id)

    # all commits in descending order
    all_commits = (session.query(Commit).filter(
        Commit.repository_id == repo_id).order_by(
            Commit.author_date_unix_timestamp.desc()).all())

    # corrective commits in ascending order
    # if updating, only get the corrective commits that have not been linked yet.
    # No need to re-link corrective commits that have already been linked with the bug-inducing commit.
    corrective_commits = (session.query(Commit).filter(
        (Commit.fix == "True") & (Commit.repository_id == repo_id)
        & (Commit.linked == False)).order_by(
            Commit.author_date_unix_timestamp.asc()).all())

    logging.info("Linking " + str(len(corrective_commits)) +
                 " new corrective commits for repo " + repo_id)

    try:
        git_commit_linker = GitCommitLinker(repo_id)
        git_commit_linker.linkCorrectiveCommits(corrective_commits,
                                                all_commits)
    except Exception as e:
        logging.exception(
            "Got an exception linking bug fixing changes to bug inducing changes for repo "
            + repo_id)
        repository_to_analyze.status = "Error"
        session.commit()  # update repo status
        raise

    # Signify to CAS Manager that this repo is ready to have it's model built
    if repository_to_analyze.status != "Error":
        repository_to_analyze.status = "In Queue to Build Model"
        session.commit()  # update repo status
        # after update commit.contains_bug & commit.fix label, parsing diff information
        git = Git()
        git.diff(repo_id)
Пример #11
0
    def checkModel(self):
        """Check if any repo needs metrics to be generated"""

        session = Session()
        repos_to_get = (session.query(Repository).filter(
            (Repository.status == "In Queue to Build Model")).all())

        for repo in repos_to_get:
            logging.info("Adding repo " + repo.id +
                         " to model queue to finish analyzing")
            repo.status = "Building Model"
            session.commit()  # update status of repo
            self.modelQueue.put(repo.id)

        session.close()
Пример #12
0
	def checkModel(self):
		"""Check if any repo needs metrics to be generated"""

		session = Session()
		repos_to_get = (session.query(Repository) 
							.filter( 
								(Repository.status == "In Queue to Build Model") )
							.all())

		for repo in repos_to_get:
			logging.info("Adding repo " + repo.id + " to model queue to finish analyzing")
			repo.status = "Building Model"
			session.commit() # update status of repo
			self.modelQueue.put(repo.id)

		session.close()
Пример #13
0
def ingest(repo_id):
    """
  Ingest a repository with the given id. Gets the repository information
  from the repository table and starts ingesting using ingestRepo method
  @param repo_id   The repository id to ingest.
  """
    session = Session()
    repo_to_analyze = (session.query(Repository).filter(
        Repository.id == repo_id).all())

    # Verify that repo exists
    if len(repo_to_analyze) == 1:
        ingestRepo(repo_to_analyze[0], session)
    else:
        logging.info('Repo with id ' + repo_id_to_analyze + ' not found!')

    session.close()
Пример #14
0
def analyze(repo_id):
    """
	Analyze the repository with the given id. Gets the repository from the repository table
	and starts ingesting using the analyzeRepo method.
	@param repo_id		The repository id to analyze
	"""
    session = Session()

    repo_to_analyze = (session.query(Repository).filter(
        Repository.id == repo_id).all())

    # Verify that repo exists
    if len(repo_to_analyze) > 0:
        analyzeRepo(repo_to_analyze[0], session)
    else:
        logging.info('Repo with id ' + repo_id_to_analyze + ' not found!')

    session.close()
Пример #15
0
    def getDateOpened(self, issueNumber):
        """
		getDateOpened()
		Gets the date the issue number was opened in unix time
		If issue cannot be found for whichever reason, returns null.
		"""
        header = {'Authorization': 'token ' + self.auth_token}
        r = requests.get(self.request_repos + "/" + self.owner + "/" +
                         self.repo + "/issues/" + issueNumber,
                         headers=header)

        data = r.json()

        # If forbidden
        if r.status_code == 403:

            # Check the api quota
            if r.headers.get('x-ratelimit-remaining') == '0':
                logging.info("Github quota limit hit -- waiting")

                # Wait up to a hour until we can continue..
                while r.headers.get('x-ratelimit-remaining') == '0':
                    time.sleep(600)  # Wait 10 minutes and try again
                    r = requests.get(self.request_repos + "/" + self.owner +
                                     "/" + self.repo + "/issues/" +
                                     issueNumber,
                                     headers=header)
                    data = r.json()

        # Check for other error codes
        elif r.status_code >= 400:
            msg = data.get('message')
            logging.error("ISSUE TRACKER FAILURE: \n" + msg)
            return None
        else:
            try:
                date = (dateutil.parser.parse(
                    data.get('created_at'))).timestamp()
                return date
            except:
                logging.error(
                    "ISSUE TRACKER FAILURE: Could not get created_at from github issues API"
                )
                return None
def ingest(repo_id):
  """
  Ingest a repository with the given id. Gets the repository information
  from the repository table and starts ingesting using ingestRepo method
  @param repo_id   The repository id to ingest.
  """
  session = Session()
  repo_to_analyze = (session.query(Repository)
        .filter (Repository.id == repo_id)
        .all()
        )

  # Verify that repo exists
  if len(repo_to_analyze) == 1:
  	ingestRepo(repo_to_analyze[0], session)
  else:
    logging.info('Repo with id ' + repo_id_to_analyze + ' not found!')

  session.close()
Пример #17
0
	def checkAnalyzation(self):
		"""Checks if any repo needs to be analyzed"""

		session = Session()
		repo_update_freq = int(config['repoUpdates']['freqInDays'])
		refresh_date = str(datetime.utcnow() - timedelta(days=repo_update_freq))

		repos_to_get = (session.query(Repository)
						  .filter( (Repository.status == "Waiting to be Analyzed") )
						  .all()
						)
		
		for repo in repos_to_get:
			logging.info("Adding repo " + repo.id + " to work queue for analyzing.")
			repo.status = "In Queue to be Analyzed"
			session.commit() # update the status of repo
			self.workQueue.add_task(analyze, repo.id)

		session.close()
Пример #18
0
	def checkAnalyzation(self):
		"""Checks if any repo needs to be analyzed"""

		session = Session()
		repo_update_freq = int(config['repoUpdates']['freqInDays'])
		refresh_date = str(datetime.utcnow() - timedelta(days=repo_update_freq))

		repos_to_get = (session.query(Repository)
						  .filter( (Repository.status == "Waiting to be Analyzed") )
						  .all()
						)
		
		for repo in repos_to_get:
			logging.info("Adding repo " + repo.id + " to work queue for analyzing.")
			repo.status = "In Queue to be Analyzed"
			session.commit() # update the status of repo
			self.workQueue.add_task(analyze, repo.id)

		session.close()
Пример #19
0
    def checkAnalyzation(self):
        """Checks if any repo needs to be analyzed"""

        session = Session()
        repo_update_freq = int(config['repoUpdates']['freqInDays'])
        refresh_date = str(datetime.utcnow() -
                           timedelta(days=repo_update_freq))

        repos_to_get = (session.query(Repository).filter(
            (Repository.status == "Waiting to be Analyzed")).all())

        for repo in repos_to_get:
            logging.info("Adding repo " + repo.id +
                         " to work queue for analyzing.")
            repo.status = "In Queue to be Analyzed"
            session.commit()  # update the status of repo
            #self.workQueue.add_task(analyze, repo.id)  # Replace the line below by this line if you wish to allow multiple threads. For SQLite, this wont work so well.
            analyze(repo.id)

        session.close()
Пример #20
0
	def notify(self, repo):
		""" Send e-mail notifications if applicable to a repo 
			used by checkBuildModel """

		notify = False
		notifier = None
		logging.info("Notifying subscribed users for repository " + repo.id)

		# Create the Notifier
		gmail_user = config['gmail']['user']
		gmail_pass = config['gmail']['pass']
		notifier = Notifier(gmail_user, gmail_pass, repo.name)

		# Add subscribers if applicable
		if repo.email is not None:
			notifier.addSubscribers([repo.email, gmail_user])
		else:
			notifier.addSubscribers([gmail_user])

		notifier.notify()
Пример #21
0
	def notify(self, repo):
		""" Send e-mail notifications if applicable to a repo 
			used by checkBuildModel """

		notify = False
		notifier = None
		logging.info("Notifying subscribed users for repository " + repo.id)

		# Create the Notifier
		gmail_user = config['gmail']['user']
		gmail_pass = config['gmail']['pass']
		notifier = Notifier(gmail_user, gmail_pass, repo.name)

		# Add subscribers if applicable
		if repo.email is not None:
			notifier.addSubscribers([repo.email, gmail_user])
		else:
			notifier.addSubscribers([gmail_user])

		notifier.notify()
Пример #22
0
def analyze(repo_id):
    """
	Analyze the repository with the given id. Gets the repository from the repository table
	and starts ingesting using the analyzeRepo method.
	@param repo_id		The repository id to analyze
	"""
    session = Session()

    repo_to_analyze = (session.query(Repository)
                       .filter(Repository.id == repo_id)
                       .all()
                       )

    # Verify that repo exists
    if len(repo_to_analyze) > 0:
        analyzeRepo(repo_to_analyze[0], session)
    else:
        logging.info('Repo with id ' + repo_id_to_analyze + ' not found!')

    session.close()
Пример #23
0
    def getMedian(self, metric):
        """
    Helper function for the method calculateMedians.
    Takes in a metric and returns a string property of the results
    @private
    """
        median_props = ""

        try:
            # R functions to be used
            medianFn = robjects.r['median']
            wilcoxFn = robjects.r['wilcox.test']

            metric_buggy = getattr(self.metrics, metric + "_buggy")
            metric_nonbuggy = getattr(self.metrics, metric + "_nonbuggy")

            # First check p-values, if signficant then calculate median
            pvalue = self.wilcoxFn(robjects.FloatVector(metric_buggy),
                                   robjects.FloatVector(metric_nonbuggy))[2][0]
            buggy_median = self.medianFn(robjects.FloatVector(metric_buggy))
            nonbuggy_median = self.medianFn(
                robjects.FloatVector(metric_nonbuggy))
            median_props += '"' + metric + 'buggy":"' + str(
                buggy_median[0]) + '", '
            median_props += '"' + metric + 'nonbuggy":"' + str(
                nonbuggy_median[0]) + '", '

            if pvalue <= self.psig:
                median_props += '"' + metric + '_sig":"1", '
            else:
                median_props += '"' + metric + '_sig":"0", '

        except:
            # catch the case where we haven't made any observations to do this metric
            logging.info("Metric " + metric +
                         " could not be used in the median model for repo " +
                         self.repo_id)

        return median_props
	def getDateOpened(self, issueNumber):
		"""
		getDateOpened()
		Gets the date the issue number was opened in unix time
		If issue cannot be found for whichever reason, returns null.
		"""
		header = {'Authorization': 'token ' + self.auth_token}
		r = requests.get(self.request_repos + "/" + self.owner + "/" + 
				self.repo + "/issues/" + issueNumber, headers=header)

		data = r.json()

		# If forbidden
		if r.status_code == 403:

			# Check the api quota 
			if r.headers.get('x-ratelimit-remaining') == '0':
				logging.info("Github quota limit hit -- waiting")

				# Wait up to a hour until we can continue..
				while r.headers.get('x-ratelimit-remaining') == '0':
					time.sleep(600) # Wait 10 minutes and try again
					r = requests.get(self.request_repos + "/" + self.owner + "/" + 
						self.repo + "/issues/" + issueNumber, headers=header)
					data = r.json()

		# Check for other error codes
		elif r.status_code >= 400:
			msg = data.get('message')
			logging.error("ISSUE TRACKER FAILURE: \n" + msg)
			return None
		else:
			try:
				date = (dateutil.parser.parse(data.get('created_at'))).timestamp()
				return date
			except:
				logging.error("ISSUE TRACKER FAILURE: Could not get created_at from github issues API")
				return None
Пример #25
0
def ingestRepo(repository_to_ingest, session):
    """
  Ingests a given repository
  @param repository_to_ingest   The repository to inspect
  @param session 				The SQLAlchemy session
  @private
  """
    logging.info('A worker is starting scan repository: ' +
                 repository_to_ingest.id)

    # Update status of repo to show it is ingesting
    repository_to_ingest.status = "Ingesting"
    session.commit()

    local_repo = LocalRepository(repository_to_ingest)
    local_repo.sync()
    session.merge(repository_to_ingest)
    repository_to_ingest.status = "Waiting to be Analyzed"  # update status
    session.commit()

    logging.info('A worker finished ingesting repo ' + repository_to_ingest.id)

    session.close()
def ingestRepo(repository_to_ingest, session):
  """
  Ingests a given repository
  @param repository_to_ingest   The repository to inspect
  @param session 				The SQLAlchemy session
  @private
  """
  logging.info( 'A worker is starting scan repository: ' +
                      repository_to_ingest.id )

  # Update status of repo to show it is ingesting
  repository_to_ingest.status = "Ingesting"
  session.commit()

  local_repo = LocalRepository(repository_to_ingest)
  local_repo.sync()
  session.merge(repository_to_ingest) 
  repository_to_ingest.status = "Waiting to be Analyzed" # update status
  session.commit() 

  logging.info( 'A worker finished ingesting repo ' + 
                  repository_to_ingest.id )

  session.close()
    def _linkCorrectiveCommit(self, commit):
        """
    links the corrective change/commit to the change/commit which was the
    cause. this is the purpose of this object

    @commit - the corrective change to link w/ the changes that introduces the
    problems/issues it fixes.
    """
        region_chunks = self.getModifiedRegions(commit)

        logging.info("Linkage for commit " + commit.commit_hash)
        for k, v in region_chunks.items():
            logging.info("-- file: " + k)
            logging.info("---- loc modified: " + str(v))

        bug_introducing_changes = self.gitAnnotate(region_chunks, commit)
        return bug_introducing_changes
Пример #28
0
    def _linkCorrectiveCommit(self, commit):
        """
    links the corrective change/commit to the change/commit which was the
    cause. this is the purpose of this object

    @commit - the corrective change to link w/ the changes that introduces the
    problems/issues it fixes.
    """
        region_chunks = self.getModifiedRegions(commit)

        logging.info("Linkage for commit " + commit.commit_hash)
        for k, v in region_chunks.items():
            logging.info("-- file: " + k)
            logging.info("---- loc modified: " + str(v))

        bug_introducing_changes = self.gitAnnotate(region_chunks, commit)
        return bug_introducing_changes
Пример #29
0
from classifier.classifier import *
from caslogging import logging

logging.info('Test categorization... ')
classifier = Classifier()

# Test classification of corrective commits
# fix,bug,wrong,fail,problem

corrective_msg_1 = "fixed something"
corrective_msg_2 = "bam, there goes a bug!"
corrective_msg_3 = "x was wrong, but no more!"
corrective_msg_4 = "Houston, we *had* a problem"
corrective_msg_5 = "My watch is fun"
corrective_msg_6 = "This is definitively NOT a you-know what!"

assert(classifier.categorize(corrective_msg_1) == "Corrective")
assert(classifier.categorize(corrective_msg_2) == "Corrective")
assert(classifier.categorize(corrective_msg_3) == "Corrective")
assert(classifier.categorize(corrective_msg_4) == "Corrective")
assert(classifier.categorize(corrective_msg_5) != "Corrective")
assert(classifier.categorize(corrective_msg_6) != "Corrective")

# Test classification of feature additions
# new,add,requirement,initial,create

feature_msg_1 = "new awesome thing added to that brillinat code"
feature_msg_2 = "adding some color to this mundane gui!"
feature_msg_3 = "Adding requirement.."
feature_msg_4 = "This is an initial commit"
feature_msg_5 = "Creating a new class for x,y, AND z!"
Пример #30
0
"""
file: readRepo.py
author: Ben Grawi <*****@*****.**>
date: October 2013
description: The base script to call
"""
from caslogging import logging
import sys
from datetime import datetime, timedelta
from commit import Commit
from repository import *
from metrics import *
from localrepository import *

logging.info('Starting CASReader')

# Read the first argument and pass it in as a string
if len(sys.argv) > 1:
    arg = sys.argv[1]
else:
    arg = ''

if arg == "initDb":
    # Init the database
    logging.info('Initializing the Database...')
    Base.metadata.create_all(engine)
    logging.info('Done')
elif arg == "testRepos":

    logging.info('Making Test Repos')
Пример #31
0
"""
file: readRepo.py
author: Ben Grawi <*****@*****.**>
date: October 2013
description: The base script to call
"""
from caslogging import logging
import sys
from datetime import datetime, timedelta
from commit import Commit
from repository import *
from metrics import *
from localrepository import *


logging.info("Starting CASReader")

# Read the first argument and pass it in as a string
if len(sys.argv) > 1:
    arg = sys.argv[1]
else:
    arg = ""

if arg == "initDb":
    # Init the database
    logging.info("Initializing the Database...")
    Base.metadata.create_all(engine)
    logging.info("Done")
elif arg == "testRepos":

    logging.info("Making Test Repos")
Пример #32
0
	def checkBuildModel(self):
		""" Checks if any repo is awaiting to build model. 
			We are using a queue because we can't concurrently access R """

		session = Session()

		if self.modelQueue.empty() != True:
			repo_id = self.modelQueue.get()
			repo = (session.query(Repository).filter(Repository.id == repo_id).first())

			# use data only up to X months prior we won't have sufficent data to build models
			# as there may be bugs introduced in those months that haven't been fixed, skewing
			# our model.
			glm_model_time =  int(config['glm_modeling']['months']) 
			data_months_datetime = datetime.utcnow() - monthdelta(glm_model_time)
			data_months_unixtime = calendar.timegm(data_months_datetime.utctimetuple())
		
			# all commits for repo prior to current time - glm model time
			training_commits = (session.query(Commit)
						.filter( 
							( Commit.repository_id == repo_id ) &
							( Commit.author_date_unix_timestamp < str(data_months_unixtime))
						)
						.order_by( Commit.author_date_unix_timestamp.desc() )
						.all())

			# all commits for repo after or on current time - glm model time
			testing_commits = (session.query(Commit)
						.filter(
							( Commit.repository_id == repo_id ) &
							( Commit.author_date_unix_timestamp >= str(data_months_unixtime)))
						.all())
	
			try: 
				metrics_generator = MetricsGenerator(repo_id, training_commits, testing_commits)
				metrics_generator.buildAllModels()

				# montly data dump - or rather, every 30 days.
				dump_refresh_date = str(datetime.utcnow() - timedelta(days=30))
				if repo.last_data_dump == None or repo.last_data_dump < dump_refresh_date:
					logging.info("Generating a monthly data dump for repository: " + repo_id)

					# Get all commits for the repository
					all_commits = (session.query(Commit)
						.filter( 
							( Commit.repository_id == repo_id )
						)
						.order_by( Commit.author_date_unix_timestamp.desc() )
						.all())

					metrics_generator.dumpData(all_commits)
					repo.last_data_dump = str(datetime.now().replace(microsecond=0))
					
				# Notify user if repo has never been analyzed previously
				if repo.analysis_date is None:
					self.notify(repo)
	
				logging.info("Repo " + repo_id + " finished analyzing.")
				repo.analysis_date = str(datetime.now().replace(microsecond=0))
				repo.status = "Analyzed"
				session.commit() # update status of repo
				session.close()

			# uh-oh
			except Exception as e:
				logging.exception("Got an exception building model for repository " + repo_id)

				repo.status = "Error"
				session.commit() # update repo status
				session.close()
Пример #33
0
repositories that need to be analyzed, analyzes it, and places 
results in the metrics table.
"""

import sys
from datetime import datetime, timedelta
from repository import *
from commit import *
from bugfinder import *
from metricsgenerator import *
from githubissuetracker import *
from caslogging import logging
from notifier import *
from config import config

logging.info('Starting CASAnalyzer')

# Latest time to analyze repo (1 Day)
refresh_date = str(datetime.utcnow() - timedelta(days=1))
session = Session()

reposToAnalyze = (session.query(
    Repository).filter((Repository.analysis_date == None)
                       | (Repository.analysis_date < refresh_date)).all())

# Create the Notifier
gmail_user = config['gmail']['user']
gmail_pass = config['gmail']['pass']
notifier = Notifier(gmail_user, gmail_pass)

if len(reposToAnalyze) > 0:
Пример #34
0
repositories that need to be analyzed, analyzes it, and places 
results in the metrics table.
"""

import sys
from datetime import datetime, timedelta
from repository import *
from commit import *
from bugfinder import *
from metricsgenerator import *
from githubissuetracker import *
from caslogging import logging
from notifier import *
from config import config

logging.info('Starting CASAnalyzer')

# Latest time to analyze repo (1 Day)
refresh_date = str(datetime.utcnow() - timedelta(days=1))
session = Session()

reposToAnalyze = (session.query(Repository)
                  .filter( (Repository.analysis_date==None) |
                          (Repository.analysis_date < refresh_date)
                          )
                  .all()
                  )

# Create the Notifier
gmail_user = config['gmail']['user']
gmail_pass = config['gmail']['pass']
Пример #35
0
	def checkBuildModel(self):
		""" Checks if any repo is awaiting to build model. 
			We are using a queue because we can't concurrently access R """

		session = Session()

		if self.modelQueue.empty() != True:
			repo_id = self.modelQueue.get()
			repo = (session.query(Repository).filter(Repository.id == repo_id).first())

			# use data only up to X months prior we won't have sufficent data to build models
			# as there may be bugs introduced in those months that haven't been fixed, skewing
			# our model.
			glm_model_time =  int(config['glm_modeling']['months']) 
			data_months_datetime = datetime.utcnow() - MonthDelta(glm_model_time)
			data_months_unixtime = calendar.timegm(data_months_datetime.utctimetuple())
		
			# all commits for repo prior to current time - glm model time
			training_commits = (session.query(Commit)
						.filter( 
							( Commit.repository_id == repo_id ) &
							( Commit.author_date_unix_timestamp < int(data_months_unixtime))
						)
						.order_by( Commit.author_date_unix_timestamp.desc() )
						.all())

			# all commits for repo after or on current time - glm model time
			testing_commits = (session.query(Commit)
						.filter(
							( Commit.repository_id == repo_id ) &
							( Commit.author_date_unix_timestamp >= int(data_months_unixtime)))
						.all())
	
			try: 
				metrics_generator = MetricsGenerator(repo_id, training_commits, testing_commits)
				metrics_generator.buildAllModels()

				# montly data dump - or rather, every 30 days.
				dump_refresh_date = str(datetime.utcnow() - timedelta(days=30))
				if repo.last_data_dump == None or repo.last_data_dump < dump_refresh_date:
					logging.info("Generating a monthly data dump for repository: " + repo_id)

					# Get all commits for the repository
					all_commits = (session.query(Commit)
						.filter( 
							( Commit.repository_id == repo_id )
						)
						.order_by( Commit.author_date_unix_timestamp.desc() )
						.all())

					metrics_generator.dumpData(all_commits)
					repo.last_data_dump = str(datetime.now().replace(microsecond=0))
					
				# Notify user if repo has never been analyzed previously
				if repo.analysis_date is None:
					self.notify(repo)
	
				logging.info("Repo " + repo_id + " finished analyzing.")
				repo.analysis_date = str(datetime.now().replace(microsecond=0))
				repo.status = "Analyzed"
				session.commit() # update status of repo
				session.close()

			# uh-oh
			except Exception as e:
				logging.exception("Got an exception building model for repository " + repo_id)

				repo.status = "Error"
				session.commit() # update repo status
				session.close()