예제 #1
0
    def _update_existing_references(self, repo_id, import_type):
        pairs = self._get_new_commit_file_pairs(repo_id)
        intervals = [
            i for i in multiprocessing_util.get_tasks_intervals(
                pairs, self._num_processes) if len(i) > 0
        ]

        queue_intervals = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(self._num_processes, intervals,
                                             results)

        for interval in intervals:
            issue_extractor = Code2DbCommitFile(self._db_name,
                                                self._git_repo_path, interval,
                                                import_type, self._config,
                                                self._log_path)
            queue_intervals.put(issue_extractor)

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(self._num_processes,
                                              queue_intervals)

        # Wait for all of the tasks to finish
        queue_intervals.join()
예제 #2
0
    def _update_channels(self, instant_messaging_id):
        #updates channels of a instant messaging
        channel_ids = self._dao.get_channel_ids(instant_messaging_id)

        if channel_ids:
            intervals = [
                i for i in multiprocessing_util.get_tasks_intervals(
                    channel_ids, len(self._tokens)) if len(i) > 0
            ]

            queue_extractors = multiprocessing.JoinableQueue()
            results = multiprocessing.Queue()

            # Start consumers
            multiprocessing_util.start_consumers(len(self._tokens),
                                                 queue_extractors, results)

            for i in range(len(intervals)):
                channel_extractor = SlackChannel2Db(
                    self._db_name, instant_messaging_id, intervals[i],
                    self._tokens[i], self._config, self._log_path)
                queue_extractors.put(channel_extractor)

            # Add end-of-queue markers
            multiprocessing_util.add_poison_pills(len(self._tokens),
                                                  queue_extractors)

            # Wait for all of the tasks to finish
            queue_extractors.join()
예제 #3
0
    def _get_channels(self, instant_messaging_id):
        #processes Slack channels
        channel_ids = self._get_channel_ids(instant_messaging_id)

        intervals = [
            i for i in multiprocessing_util.get_tasks_intervals(
                channel_ids, len(self._tokens)) if len(i) > 0
        ]

        queue_extractors = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(len(self._tokens),
                                             queue_extractors, results)

        pos = 0
        for interval in intervals:
            topic_extractor = SlackChannel2Db(self._db_name,
                                              instant_messaging_id, interval,
                                              self._tokens[pos], self._config,
                                              self._log_path)
            queue_extractors.put(topic_extractor)
            pos += 1

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(len(self._tokens),
                                              queue_extractors)

        # Wait for all of the tasks to finish
        queue_extractors.join()
예제 #4
0
    def _get_topics(self, forum_id):
        #update topics of a forum
        topic_ids = self._dao.get_topic_ids(forum_id)

        if topic_ids:
            self._update_topics_info(forum_id)

            intervals = [
                i for i in multiprocessing_util.get_tasks_intervals(
                    topic_ids, self._num_processes) if len(i) > 0
            ]

            queue_extractors = multiprocessing.JoinableQueue()
            results = multiprocessing.Queue()

            # Start consumers
            multiprocessing_util.start_consumers(self._num_processes,
                                                 queue_extractors, results)

            for interval in intervals:
                topic_extractor = EclipseTopic2Db(self._db_name, forum_id,
                                                  interval, self._config,
                                                  self._log_path)
                queue_extractors.put(topic_extractor)

            # Add end-of-queue markers
            multiprocessing_util.add_poison_pills(self._num_processes,
                                                  queue_extractors)

            # Wait for all of the tasks to finish
            queue_extractors.join()
예제 #5
0
    def _get_info_contribution(self, repo_id):
        #processes Git data
        existing_refs = self._get_existing_references(repo_id)

        queue_references = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(self._num_processes,
                                             queue_references, results)
        for reference in self._querier.get_references():
            if self._references:
                if reference[0] in self._references:
                    git_ref_extractor = Git2DbReference(
                        self._db_name, repo_id, self._git_repo_path,
                        self._before_date, self._import_type, reference[0], "",
                        self._config, self._log_path)

                    queue_references.put(git_ref_extractor)
            else:
                if reference[0] not in existing_refs:
                    git_ref_extractor = Git2DbReference(
                        self._db_name, repo_id, self._git_repo_path,
                        self._before_date, self._import_type, reference[0], "",
                        self._config, self._log_path)

                    queue_references.put(git_ref_extractor)

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(self._num_processes,
                                              queue_references)

        # Wait for all of the tasks to finish
        queue_references.join()
예제 #6
0
    def _insert_issue_data(self, repo_id, issue_tracker_id):
        #processes issue data
        imported = self._dao.get_already_imported_issue_ids(
            issue_tracker_id, repo_id)
        issues = list(
            set(self._querier.get_issue_ids(self._before_date)) -
            set(imported))

        intervals = [
            i for i in multiprocessing_util.get_tasks_intervals(
                issues, len(self._tokens)) if len(i) > 0
        ]

        queue_intervals = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(len(self._tokens),
                                             queue_intervals, results)

        pos = 0
        for interval in intervals:
            issue_extractor = GitHubIssue2Db(self._db_name, repo_id,
                                             issue_tracker_id, self._url,
                                             interval, self._tokens[pos],
                                             self._config, self._log_path)
            queue_intervals.put(issue_extractor)
            pos += 1

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(len(self._tokens),
                                              queue_intervals)

        # Wait for all of the tasks to finish
        queue_intervals.join()
예제 #7
0
    def _get_topics(self, forum_id):
        # updates topics of a forum
        topic_ids = self._dao.get_topic_own_ids(forum_id)

        if topic_ids:
            intervals = [
                i for i in multiprocessing_util.get_tasks_intervals(
                    topic_ids, len(self._tokens)) if len(i) > 0
            ]

            queue_extractors = multiprocessing.JoinableQueue()
            results = multiprocessing.Queue()

            # Start consumers
            multiprocessing_util.start_consumers(len(self._tokens),
                                                 queue_extractors, results)

            for i in range(len(intervals)):
                topic_extractor = StackOverflowTopic2Db(
                    self._db_name, forum_id, intervals[i], self._tokens[i],
                    self._config, self._log_path)
                queue_extractors.put(topic_extractor)

            # Add end-of-queue markers
            multiprocessing_util.add_poison_pills(len(self._tokens),
                                                  queue_extractors)

            # Wait for all of the tasks to finish
            queue_extractors.join()
예제 #8
0
    def _insert_issue_dependencies(self, repo_id, issue_tracker_id):
        # processes issue dependency data
        issues = self._dao.get_already_imported_issue_ids(
            issue_tracker_id, repo_id)
        intervals = [
            i for i in multiprocessing_util.get_tasks_intervals(
                issues, self._num_processes) if len(i) > 0
        ]

        queue_intervals = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(self._num_processes,
                                             queue_intervals, results)

        for interval in intervals:
            issue_dependency_extractor = BugzillaIssueDependency2Db(
                self._db_name, repo_id, issue_tracker_id, self._url,
                self._product, interval, self._config, self._log_path)
            queue_intervals.put(issue_dependency_extractor)

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(self._num_processes,
                                              queue_intervals)

        # Wait for all of the tasks to finish
        queue_intervals.join()
예제 #9
0
    def _get_topics(self, forum_id):
        # processes Stackoverflow questions
        topic_imported = self._dao.get_topic_own_ids(forum_id)
        topic_ids = list(set(self._querier.get_topic_ids(self._search_query, self._before_date)) - set(topic_imported))
        topic_ids.sort()

        intervals = [i for i in multiprocessing_util.get_tasks_intervals(topic_ids, len(self._tokens)) if len(i) > 0]

        queue_extractors = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(len(self._tokens), queue_extractors, results)

        pos = 0
        for interval in intervals:
            topic_extractor = StackOverflowTopic2Db(self._db_name, forum_id, interval, self._tokens[pos],
                                                    self._config, self._log_path)
            queue_extractors.put(topic_extractor)
            pos += 1

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(len(self._tokens), queue_extractors)

        # Wait for all of the tasks to finish
        queue_extractors.join()
예제 #10
0
    def _update_existing_references(self, repo_id, import_type):
        # updates existing references in the DB
        cursor = self._dao.get_cursor()
        query = "SELECT c.sha, lc.ref_id " \
                "FROM commit c " \
                "JOIN (SELECT ref_id, max(commit_id) as last_commit_id_in_ref " \
                "FROM commit_in_reference WHERE repo_id = %s GROUP BY ref_id) as lc " \
                "ON c.id = lc.last_commit_id_in_ref"
        arguments = [repo_id]
        self._dao.execute(cursor, query, arguments)

        queue_references = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(self._num_processes,
                                             queue_references, results)

        row = self._dao.fetchone(cursor)
        while row:
            sha = row[0]
            ref_id = row[1]
            row = self._dao.fetchone(cursor)

            ref_name = self._dao.select_reference_name(repo_id, ref_id)

            for reference in self._querier.get_references():
                reference_name = reference[0]
                if reference_name == ref_name:
                    self._existing_refs.append(ref_name)

                    git_ref_extractor = Git2DbReference(
                        self._db_name, repo_id, self._git_repo_path,
                        self._before_date, import_type, reference[0], sha,
                        self._config, self._log_path)

                    queue_references.put(git_ref_extractor)
                    break

        self._dao.close_cursor(cursor)

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(self._num_processes,
                                              queue_references)

        # Wait for all of the tasks to finish
        queue_references.join()
예제 #11
0
    def _update_issue_dependency(self, repo_id, issue_tracker_id, intervals, url):
        #updates issue dependencies already stored in the DB
        queue_intervals = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(self._num_processes, queue_intervals, results)

        for interval in intervals:
            issue_dependency_extractor = BugzillaIssueDependency2Db(self._db_name, repo_id, issue_tracker_id, url, self._product, interval,
                                                 self._config, self._log_path)
            queue_intervals.put(issue_dependency_extractor)

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(self._num_processes, queue_intervals)

        # Wait for all of the tasks to finish
        queue_intervals.join()
예제 #12
0
    def _update_issue_content(self, repo_id, issue_tracker_id, intervals, url):
        # updates issues already stored in the DB
        queue_intervals = multiprocessing.JoinableQueue()
        results = multiprocessing.Queue()

        # Start consumers
        multiprocessing_util.start_consumers(len(self._tokens), queue_intervals, results)

        pos = 0
        for interval in intervals:
            issue_extractor = GitHubIssue2Db(self._db_name, repo_id, issue_tracker_id, url, interval,
                                             self._tokens[pos], self._config, self._log_path)
            queue_intervals.put(issue_extractor)
            pos += 1

        # Add end-of-queue markers
        multiprocessing_util.add_poison_pills(len(self._tokens), queue_intervals)

        # Wait for all of the tasks to finish
        queue_intervals.join()