示例#1
0
    def test_bad_exists_properties(self):
        test = {
            "data": [{
                "~e~": 1
            }, {
                "~e~": 1
            }],
            "query": {
                "from": TEST_TABLE,
                "select": [{
                    "name": "count",
                    "aggregate": "count"
                }],
            },
            "expecting_list": {
                "meta": {
                    "format": "value"
                },
                "data": {
                    "count": 2
                }
            }
        }

        subtest = wrap(test)

        cont = self.utils.fill_container(subtest, typed=False)
        db = Sqlite(filename="metadata.localhost.sqlite")
        try:
            with db.transaction() as t:
                t.execute(
                    "insert into " + quote_column("meta.columns") +
                    "(name, es_type, jx_type, nested_path, es_column, es_index, last_updated) VALUES "
                    + quote_set([
                        ".", "object", "exists", '["."]', ".", cont.alias,
                        Date.now()
                    ]))
        except Exception as e:
            pass
        try:
            with db.transaction() as t:
                t.execute(
                    "insert into " + quote_column("meta.columns") +
                    "(name, es_type, jx_type, nested_path, es_column, es_index, last_updated) VALUES "
                    + quote_set([
                        "~e~", "long", "exists", '["."]', "~e~", cont.alias,
                        Date.now()
                    ]))
        except Exception as e:
            pass

        self.utils.send_queries(subtest)
示例#2
0
    def add_cset_entries(self,
                         ordered_rev_list,
                         timestamp=False,
                         number_forward=True):
        '''
        Adds a list of revisions to the table. Assumes ordered_rev_list is an ordered
        based on how changesets are found in the changelog. Going forwards or backwards is dealt
        with by flipping the list
        :param ordered_cset_list: Order given from changeset log searching.
        :param timestamp: If false, records are kept indefinitely
                          but if holes exist: (delete, None, delete, None)
                          those delete's with None's around them
                          will not be deleted.
        :param numbered: If True, this function will number the revision list
                         by going forward from max(revNum), else it'll go backwards
                         from revNum, then add X to all revnums and self.next_revnum
                         where X is the length of ordered_rev_list
        :return:
        '''
        with self.conn.transaction() as t:
            current_min = t.get_one("SELECT min(revnum) FROM csetlog")[0]
            current_max = t.get_one("SELECT max(revnum) FROM csetlog")[0]
            if not current_min or not current_max:
                current_min = 0
                current_max = 0

            direction = -1
            start = current_min - 1
            if number_forward:
                direction = 1
                start = current_max + 1
                ordered_rev_list = ordered_rev_list[::-1]

            insert_list = [(start + direction * count, rev,
                            int(time.time()) if timestamp else -1)
                           for count, rev in enumerate(ordered_rev_list)]

            # In case of overlapping requests
            fmt_insert_list = []
            for cset_entry in insert_list:
                tmp = self._get_one_revision(t, cset_entry)
                if not tmp:
                    fmt_insert_list.append(cset_entry)

            for _, tmp_insert_list in jx.groupby(fmt_insert_list,
                                                 size=SQL_CSET_BATCH_SIZE):
                t.execute(
                    "INSERT INTO csetLog (revnum, revision, timestamp)" +
                    " VALUES " + sql_list(
                        quote_set((revnum, revision, timestamp))
                        for revnum, revision, timestamp in tmp_insert_list))

            # Move the revision numbers forward if needed
            self.recompute_table_revnums()

        # Start a maintenance run if needed
        if self.check_for_maintenance():
            Log.note("Scheduling maintenance run on clogger.")
            self.maintenance_signal.go()
示例#3
0
def insert_into_db_chunked(transaction, data, cmd, sql_chunk_size=500):
    # For the `cmd` object, we expect something like (don't forget the whitespace at the end):
    #   "INSERT INTO temporal (tuid, file, revision, line) VALUES "
    #
    # `data` must be a list of tuples.
    for _, inserts_list in jx.groupby(data, size=sql_chunk_size):
        transaction.execute(cmd + sql_list(
            quote_set(entry) for entry in inserts_list))
示例#4
0
文件: util.py 项目: rv404674/TUID
def insert_into_db_chunked(transaction, data, cmd, sql_chunk_size=500):
    # For the `cmd` object, we expect something like (don't forget the whitespace at the end):
    #   "INSERT INTO temporal (tuid, file, revision, line) VALUES "
    #
    # `data` must be a list of tuples.
    for _, inserts_list in jx.groupby(data, size=sql_chunk_size):
        transaction.execute(
            cmd +
            sql_list(quote_set(entry) for entry in inserts_list)
        )
示例#5
0
文件: clogger.py 项目: rv404674/TUID
    def initialize_to_range(self, old_rev, new_rev, delete_old=True):
        '''
        Used in service testing to get to very old
        changesets quickly.
        :param old_rev: The oldest revision to keep
        :param new_rev: The revision to start searching from
        :return:
        '''
        old_settings = [
            self.disable_tipfilling,
            self.disable_backfilling,
            self.disable_maintenance,
            self.disable_deletion
        ]
        self.disable_tipfilling = True
        self.disable_backfilling = True
        self.disable_maintenance = True
        self.disable_deletion = True

        old_rev = old_rev[:12]
        new_rev = new_rev[:12]

        with self.working_locker:
            if delete_old:
                with self.conn.transaction() as t:
                    t.execute("DELETE FROM csetLog")
            with self.conn.transaction() as t:
                t.execute(
                    "INSERT INTO csetLog (revision, timestamp) VALUES " +
                    quote_set((new_rev, -1))
                )
            self._fill_in_range(old_rev, new_rev, timestamp=True, number_forward=False)

        self.disable_tipfilling = old_settings[0]
        self.disable_backfilling = old_settings[1]
        self.disable_maintenance = old_settings[2]
        self.disable_deletion = old_settings[3]
示例#6
0
    def initialize_to_range(self, old_rev, new_rev, delete_old=True):
        '''
        Used in service testing to get to very old
        changesets quickly.
        :param old_rev: The oldest revision to keep
        :param new_rev: The revision to start searching from
        :return:
        '''
        old_settings = [
            self.disable_tipfilling, self.disable_backfilling,
            self.disable_maintenance, self.disable_deletion
        ]
        self.disable_tipfilling = True
        self.disable_backfilling = True
        self.disable_maintenance = True
        self.disable_deletion = True

        old_rev = old_rev[:12]
        new_rev = new_rev[:12]

        with self.working_locker:
            if delete_old:
                with self.conn.transaction() as t:
                    t.execute("DELETE FROM csetLog")
            with self.conn.transaction() as t:
                t.execute("INSERT INTO csetLog (revision, timestamp) VALUES " +
                          quote_set((new_rev, -1)))
            self._fill_in_range(old_rev,
                                new_rev,
                                timestamp=True,
                                number_forward=False)

        self.disable_tipfilling = old_settings[0]
        self.disable_backfilling = old_settings[1]
        self.disable_maintenance = old_settings[2]
        self.disable_deletion = old_settings[3]
示例#7
0
文件: clogger.py 项目: rv404674/TUID
    def csetLog_deleter(self, please_stop=None):
        '''
        Deletes changesets from the csetLog table
        and also changesets from the annotation table
        that have revisions matching the given changesets.
        Accepts lists of csets from self.deletions_todo.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                request = self.deletions_todo.pop(till=please_stop)
                if please_stop:
                    break

                # If deletion is disabled, ignore the current
                # request - it will need to be re-requested.
                if self.disable_deletion:
                    Till(till=CSET_DELETION_WAIT_TIME).wait()
                    continue

                with self.working_locker:
                    first_cset = request

                    # Since we are deleting and moving stuff around in the
                    # TUID tables, we need everything to be contained in
                    # one transaction with no interruptions.
                    with self.conn.transaction() as t:
                        revnum = self._get_one_revnum(t, first_cset)[0]
                        csets_to_del = t.get(
                            "SELECT revnum, revision FROM csetLog WHERE revnum <= ?", (revnum,)
                        )
                        csets_to_del = [cset for _, cset in csets_to_del]
                        existing_frontiers = t.query(
                            "SELECT revision FROM latestFileMod WHERE revision IN " +
                            quote_set(csets_to_del)
                        ).data

                        existing_frontiers = [existing_frontiers[i][0] for i, _ in enumerate(existing_frontiers)]
                        Log.note(
                            "Deleting all annotations and changeset log entries with revisions in the list: {{csets}}",
                            csets=csets_to_del
                        )

                        if len(existing_frontiers) > 0:
                            # This handles files which no longer exist anymore in
                            # the main branch.
                            Log.note(
                                "Deleting existing frontiers for revisions: {{revisions}}",
                                revisions=existing_frontiers
                            )
                            t.execute(
                                "DELETE FROM latestFileMod WHERE revision IN " +
                                quote_set(existing_frontiers)
                            )

                        Log.note("Deleting annotations...")
                        t.execute(
                            "DELETE FROM annotations WHERE revision IN " +
                            quote_set(csets_to_del)
                        )

                        Log.note(
                            "Deleting {{num_entries}} csetLog entries...",
                            num_entries=len(csets_to_del)
                        )
                        t.execute(
                            "DELETE FROM csetLog WHERE revision IN " +
                            quote_set(csets_to_del)
                        )

                    # Recalculate the revnums
                    self.recompute_table_revnums()
            except Exception as e:
                Log.warning("Unexpected error occured while deleting from csetLog:", cause=e)
                Till(seconds=CSET_DELETION_WAIT_TIME).wait()
        return
示例#8
0
文件: clogger.py 项目: rv404674/TUID
    def csetLog_maintenance(self, please_stop=None):
        '''
        Handles deleting old csetLog entries and timestamping
        revisions once they pass the length for permanent
        storage for deletion later.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                # Wait until something signals the maintenance cycle
                # to begin (or end).
                (self.maintenance_signal | please_stop).wait()

                if please_stop:
                    break
                if self.disable_maintenance:
                    continue

                Log.warning(
                    "Starting clog maintenance. Since this doesn't start often, "
                    "we need to explicitly see when it's started with this warning."
                )

                # Reset signal so we don't request
                # maintenance infinitely.
                with self.maintenance_signal.lock:
                    self.maintenance_signal._go = False

                with self.working_locker:
                    all_data = None
                    with self.conn.transaction() as t:
                        all_data = sorted(
                            t.get("SELECT revnum, revision, timestamp FROM csetLog"),
                            key=lambda x: int(x[0])
                        )

                    # Restore maximum permanents (if overflowing)
                    new_data = []
                    modified = False
                    for count, (revnum, revision, timestamp) in enumerate(all_data[::-1]):
                        if count < MINIMUM_PERMANENT_CSETS:
                            if timestamp != -1:
                                modified = True
                                new_data.append((revnum, revision, -1))
                            else:
                                new_data.append((revnum, revision, timestamp))
                        elif type(timestamp) != int or timestamp == -1:
                            modified = True
                            new_data.append((revnum, revision, int(time.time())))
                        else:
                            new_data.append((revnum, revision, timestamp))

                    # Delete annotations at revisions with timestamps
                    # that are too old. The csetLog entries will have
                    # their timestamps reset here.
                    new_data1 = []
                    annrevs_to_del = []
                    current_time = time.time()
                    for count, (revnum, revision, timestamp) in enumerate(new_data[::-1]):
                        new_timestamp = timestamp
                        if timestamp != -1:
                            if current_time >= timestamp + TIME_TO_KEEP_ANNOTATIONS.seconds:
                                modified = True
                                new_timestamp = current_time
                                annrevs_to_del.append(revision)
                        new_data1.append((revnum, revision, new_timestamp))

                    if len(annrevs_to_del) > 0:
                        # Delete any latestFileMod and annotation entries
                        # that are too old.
                        Log.note(
                            "Deleting annotations and latestFileMod for revisions for being "
                            "older than {{oldest}}: {{revisions}}",
                            oldest=TIME_TO_KEEP_ANNOTATIONS,
                            revisions=annrevs_to_del
                        )
                        with self.conn.transaction() as t:
                            t.execute(
                                "DELETE FROM latestFileMod WHERE revision IN " +
                                quote_set(annrevs_to_del)
                            )
                            t.execute(
                                "DELETE FROM annotations WHERE revision IN " +
                                quote_set(annrevs_to_del)
                            )

                    # Delete any overflowing entries
                    new_data2 = new_data1
                    reved_all_data = all_data[::-1]
                    deleted_data = reved_all_data[MAXIMUM_NONPERMANENT_CSETS:]
                    delete_overflowing_revstart = None
                    if len(deleted_data) > 0:
                        _, delete_overflowing_revstart, _ = deleted_data[0]
                        new_data2 = set(all_data) - set(deleted_data)

                        # Update old frontiers if requested, otherwise
                        # they will all get deleted by the csetLog_deleter
                        # worker
                        if UPDATE_VERY_OLD_FRONTIERS:
                            _, max_revision, _ = all_data[-1]
                            for _, revision, _ in deleted_data:
                                with self.conn.transaction() as t:
                                    old_files = t.get(
                                        "SELECT file FROM latestFileMod WHERE revision=?",
                                        (revision,)
                                    )
                                if old_files is None or len(old_files) <= 0:
                                    continue

                                self.tuid_service.get_tuids_from_files(
                                    old_files,
                                    max_revision,
                                    going_forward=True,
                                )

                                still_exist = True
                                while still_exist and not please_stop:
                                    Till(seconds=TUID_EXISTENCE_WAIT_TIME).wait()
                                    with self.conn.transaction() as t:
                                        old_files = t.get(
                                            "SELECT file FROM latestFileMod WHERE revision=?",
                                            (revision,)
                                        )
                                    if old_files is None or len(old_files) <= 0:
                                        still_exist = False

                    # Update table and schedule a deletion
                    if modified:
                        with self.conn.transaction() as t:
                            insert_into_db_chunked(
                                t,
                                new_data2,
                                "INSERT OR REPLACE INTO csetLog (revnum, revision, timestamp) VALUES "
                            )
                    if not deleted_data:
                        continue

                    Log.note("Scheduling {{num_csets}} for deletion", num_csets=len(deleted_data))
                    self.deletions_todo.add(delete_overflowing_revstart)
            except Exception as e:
                Log.warning("Unexpected error occured while maintaining csetLog, continuing to try: ", cause=e)
        return
示例#9
0
文件: clogger.py 项目: rv404674/TUID
    def add_cset_entries(self, ordered_rev_list, timestamp=False, number_forward=True):
        '''
        Adds a list of revisions to the table. Assumes ordered_rev_list is an ordered
        based on how changesets are found in the changelog. Going forwards or backwards is dealt
        with by flipping the list
        :param ordered_cset_list: Order given from changeset log searching.
        :param timestamp: If false, records are kept indefinitely
                          but if holes exist: (delete, None, delete, None)
                          those delete's with None's around them
                          will not be deleted.
        :param numbered: If True, this function will number the revision list
                         by going forward from max(revNum), else it'll go backwards
                         from revNum, then add X to all revnums and self.next_revnum
                         where X is the length of ordered_rev_list
        :return:
        '''
        with self.conn.transaction() as t:
            current_min = t.get_one("SELECT min(revnum) FROM csetlog")[0]
            current_max = t.get_one("SELECT max(revnum) FROM csetlog")[0]
            if not current_min or not current_max:
                current_min = 0
                current_max = 0

            direction = -1
            start = current_min - 1
            if number_forward:
                direction = 1
                start = current_max + 1
                ordered_rev_list = ordered_rev_list[::-1]

            insert_list = [
                (
                    start + direction * count,
                    rev,
                    int(time.time()) if timestamp else -1
                )
                for count, rev in enumerate(ordered_rev_list)
            ]

            # In case of overlapping requests
            fmt_insert_list = []
            for cset_entry in insert_list:
                tmp = self._get_one_revision(t, cset_entry)
                if not tmp:
                    fmt_insert_list.append(cset_entry)

            for _, tmp_insert_list in jx.groupby(fmt_insert_list, size=SQL_CSET_BATCH_SIZE):
                t.execute(
                    "INSERT INTO csetLog (revnum, revision, timestamp)" +
                    " VALUES " +
                    sql_list(
                        quote_set((revnum, revision, timestamp))
                        for revnum, revision, timestamp in tmp_insert_list
                    )
                )

            # Move the revision numbers forward if needed
            self.recompute_table_revnums()

        # Start a maintenance run if needed
        if self.check_for_maintenance():
            Log.note("Scheduling maintenance run on clogger.")
            self.maintenance_signal.go()
示例#10
0
文件: util.py 项目: rv404674/TUID
    def create_and_insert_tuids(self, revision):
        self.replace_line_with_tuidline()

        line_origins = []
        all_new_lines = []
        for line_obj in self.lines:
            line_entry = (line_obj.filename, revision, line_obj.line)
            if not line_obj.tuid or line_obj.is_new_line:
                all_new_lines.append(line_obj.line)
            line_origins.append(line_entry)

        with self.tuid_service.conn.transaction() as t:
            # Get the new lines, excluding those that have existing tuids
            existing_tuids = {}
            if len(all_new_lines) > 0:
                try:
                    existing_tuids = {
                        line: tuid
                        for tuid, file, revision, line in t.query(
                            "SELECT tuid, file, revision, line FROM temporal"
                            " WHERE file = " + quote_value(self.filename)+
                            " AND revision = " + quote_value(revision) +
                            " AND line IN " + quote_set(all_new_lines)
                        ).data
                    }
                except Exception as e:
                    # Log takes out important output, use print instead
                    self.failed_file = True
                    print("Trying to find new lines: " + str(all_new_lines))
                    Log.error("Error encountered:", cause=e)

            insert_entries = []
            insert_lines = set(all_new_lines) - set(existing_tuids.keys())
            if len(insert_lines) > 0:
                try:
                    insert_entries = [
                        (self.tuid_service.tuid(),) + line_origins[linenum-1]
                        for linenum in insert_lines
                    ]
                    insert_into_db_chunked(
                        t,
                        insert_entries,
                        "INSERT INTO temporal (tuid, file, revision, line) VALUES "
                    )
                except Exception as e:
                    Log.note(
                        "Failed to insert new tuids (likely due to merge conflict) on {{file}}: {{cause}}",
                        file=self.filename,
                        cause=e
                    )
                    self.failed_file = True
                    return

            fmt_inserted_lines = {line: tuid for tuid, _, _, line in insert_entries}
            for line_obj in self.lines:
                # If a tuid already exists for this line,
                # replace, otherwise, use the newly created one.
                if line_obj.line in existing_tuids:
                    line_obj.tuid = existing_tuids[line_obj.line]
                elif line_obj.line in fmt_inserted_lines:
                    line_obj.tuid = fmt_inserted_lines[line_obj.line]

                if not line_obj.tuid:
                    Log.warning(
                        "Cannot find TUID at {{file}} and {{rev}}for: {{line}}",
                        file=self.filename,
                        rev=revision,
                        line=str(line_obj)
                    )
                    self.failed_file = True
                    return
示例#11
0
    def csetLog_deleter(self, please_stop=None):
        '''
        Deletes changesets from the csetLog table
        and also changesets from the annotation table
        that have revisions matching the given changesets.
        Accepts lists of csets from self.deletions_todo.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                request = self.deletions_todo.pop(till=please_stop)
                if please_stop:
                    break

                # If deletion is disabled, ignore the current
                # request - it will need to be re-requested.
                if self.disable_deletion:
                    Till(till=CSET_DELETION_WAIT_TIME).wait()
                    continue

                with self.working_locker:
                    first_cset = request

                    # Since we are deleting and moving stuff around in the
                    # TUID tables, we need everything to be contained in
                    # one transaction with no interruptions.
                    with self.conn.transaction() as t:
                        revnum = self._get_one_revnum(t, first_cset)[0]
                        csets_to_del = t.get(
                            "SELECT revnum, revision FROM csetLog WHERE revnum <= ?",
                            (revnum, ))
                        csets_to_del = [cset for _, cset in csets_to_del]
                        existing_frontiers = t.query(
                            "SELECT revision FROM latestFileMod WHERE revision IN "
                            + quote_set(csets_to_del)).data

                        existing_frontiers = [
                            existing_frontiers[i][0]
                            for i, _ in enumerate(existing_frontiers)
                        ]
                        Log.note(
                            "Deleting all annotations and changeset log entries with revisions in the list: {{csets}}",
                            csets=csets_to_del)

                        if len(existing_frontiers) > 0:
                            # This handles files which no longer exist anymore in
                            # the main branch.
                            Log.note(
                                "Deleting existing frontiers for revisions: {{revisions}}",
                                revisions=existing_frontiers)
                            t.execute(
                                "DELETE FROM latestFileMod WHERE revision IN "
                                + quote_set(existing_frontiers))

                        Log.note("Deleting annotations...")
                        t.execute(
                            "DELETE FROM annotations WHERE revision IN " +
                            quote_set(csets_to_del))

                        Log.note("Deleting {{num_entries}} csetLog entries...",
                                 num_entries=len(csets_to_del))
                        t.execute("DELETE FROM csetLog WHERE revision IN " +
                                  quote_set(csets_to_del))

                    # Recalculate the revnums
                    self.recompute_table_revnums()
            except Exception as e:
                Log.warning(
                    "Unexpected error occured while deleting from csetLog:",
                    cause=e)
                Till(seconds=CSET_DELETION_WAIT_TIME).wait()
        return
示例#12
0
    def csetLog_maintenance(self, please_stop=None):
        '''
        Handles deleting old csetLog entries and timestamping
        revisions once they pass the length for permanent
        storage for deletion later.
        :param please_stop:
        :return:
        '''
        while not please_stop:
            try:
                # Wait until something signals the maintenance cycle
                # to begin (or end).
                (self.maintenance_signal | please_stop).wait()

                if please_stop:
                    break
                if self.disable_maintenance:
                    continue

                Log.warning(
                    "Starting clog maintenance. Since this doesn't start often, "
                    "we need to explicitly see when it's started with this warning."
                )

                # Reset signal so we don't request
                # maintenance infinitely.
                with self.maintenance_signal.lock:
                    self.maintenance_signal._go = False

                with self.working_locker:
                    all_data = None
                    with self.conn.transaction() as t:
                        all_data = sorted(t.get(
                            "SELECT revnum, revision, timestamp FROM csetLog"),
                                          key=lambda x: int(x[0]))

                    # Restore maximum permanents (if overflowing)
                    new_data = []
                    modified = False
                    for count, (revnum, revision,
                                timestamp) in enumerate(all_data[::-1]):
                        if count < MINIMUM_PERMANENT_CSETS:
                            if timestamp != -1:
                                modified = True
                                new_data.append((revnum, revision, -1))
                            else:
                                new_data.append((revnum, revision, timestamp))
                        elif type(timestamp) != int or timestamp == -1:
                            modified = True
                            new_data.append(
                                (revnum, revision, int(time.time())))
                        else:
                            new_data.append((revnum, revision, timestamp))

                    # Delete annotations at revisions with timestamps
                    # that are too old. The csetLog entries will have
                    # their timestamps reset here.
                    new_data1 = []
                    annrevs_to_del = []
                    current_time = time.time()
                    for count, (revnum, revision,
                                timestamp) in enumerate(new_data[::-1]):
                        new_timestamp = timestamp
                        if timestamp != -1:
                            if current_time >= timestamp + TIME_TO_KEEP_ANNOTATIONS.seconds:
                                modified = True
                                new_timestamp = current_time
                                annrevs_to_del.append(revision)
                        new_data1.append((revnum, revision, new_timestamp))

                    if len(annrevs_to_del) > 0:
                        # Delete any latestFileMod and annotation entries
                        # that are too old.
                        Log.note(
                            "Deleting annotations and latestFileMod for revisions for being "
                            "older than {{oldest}}: {{revisions}}",
                            oldest=TIME_TO_KEEP_ANNOTATIONS,
                            revisions=annrevs_to_del)
                        with self.conn.transaction() as t:
                            t.execute(
                                "DELETE FROM latestFileMod WHERE revision IN "
                                + quote_set(annrevs_to_del))
                            t.execute(
                                "DELETE FROM annotations WHERE revision IN " +
                                quote_set(annrevs_to_del))

                    # Delete any overflowing entries
                    new_data2 = new_data1
                    reved_all_data = all_data[::-1]
                    deleted_data = reved_all_data[MAXIMUM_NONPERMANENT_CSETS:]
                    delete_overflowing_revstart = None
                    if len(deleted_data) > 0:
                        _, delete_overflowing_revstart, _ = deleted_data[0]
                        new_data2 = set(all_data) - set(deleted_data)

                        # Update old frontiers if requested, otherwise
                        # they will all get deleted by the csetLog_deleter
                        # worker
                        if UPDATE_VERY_OLD_FRONTIERS:
                            _, max_revision, _ = all_data[-1]
                            for _, revision, _ in deleted_data:
                                with self.conn.transaction() as t:
                                    old_files = t.get(
                                        "SELECT file FROM latestFileMod WHERE revision=?",
                                        (revision, ))
                                if old_files is None or len(old_files) <= 0:
                                    continue

                                self.tuid_service.get_tuids_from_files(
                                    old_files,
                                    max_revision,
                                    going_forward=True,
                                )

                                still_exist = True
                                while still_exist and not please_stop:
                                    Till(seconds=TUID_EXISTENCE_WAIT_TIME
                                         ).wait()
                                    with self.conn.transaction() as t:
                                        old_files = t.get(
                                            "SELECT file FROM latestFileMod WHERE revision=?",
                                            (revision, ))
                                    if old_files is None or len(
                                            old_files) <= 0:
                                        still_exist = False

                    # Update table and schedule a deletion
                    if modified:
                        with self.conn.transaction() as t:
                            insert_into_db_chunked(
                                t, new_data2,
                                "INSERT OR REPLACE INTO csetLog (revnum, revision, timestamp) VALUES "
                            )
                    if not deleted_data:
                        continue

                    Log.note("Scheduling {{num_csets}} for deletion",
                             num_csets=len(deleted_data))
                    self.deletions_todo.add(delete_overflowing_revstart)
            except Exception as e:
                Log.warning(
                    "Unexpected error occured while maintaining csetLog, continuing to try: ",
                    cause=e)
        return
示例#13
0
def test_annotation_memory(service):
    import psutil
    import os
    import gc, pprint

    gc.set_debug(gc.DEBUG_SAVEALL)

    with open('resources/stressfiles.json', 'r') as f:
        files = json.load(f)

    total_trials = 1000
    total_files = 1
    files_to_get = files[:total_files]
    test_rev = "58eb13b394f4"

    all_end_mems = [None] * total_trials
    all_percents = [None] * total_trials
    process = psutil.Process(os.getpid())
    start_mem = -1
    for i in range(total_trials):

        # Randomize files
        #files_to_get = [random.choice(files) for _ in range(total_files)]

        with service.conn.transaction() as t:
            t.execute("DELETE FROM temporal WHERE file IN " +
                      quote_set(files_to_get))
            t.execute("DELETE FROM annotations WHERE file IN " +
                      quote_set(files_to_get))
            t.execute("DELETE FROM latestFileMod WHERE file IN " +
                      quote_set(files_to_get))

        if start_mem == -1:
            start_mem = round(process.memory_info().rss / (1000 * 1000), 2)
        service.get_tuids(files_to_get, test_rev)
        end_mem = round(process.memory_info().rss / (1000 * 1000), 2)
        pc_used = service.statsdaemon.get_used_memory_percent()

        Log.note("GC get_count: {{getc}}", getc=gc.get_count())
        Log.note("GC collect: {{getc}}", getc=gc.collect())

        Log.note(
            "Started with {{mem}}, finished with {{endmem}}. Percent currently used is {{pc}}",
            mem=start_mem,
            endmem=end_mem,
            pc=pc_used)
        Log.note("Used {{mem}} Mb since first get_tuids call.",
                 mem=str(end_mem - start_mem))

        if GC_DEBUG:
            Log.note("Uncollected garbage: ")
            pprint.pprint(gc.garbage)

            import time
            time.sleep(10)

        all_end_mems[i] = end_mem
        all_percents[i] = pc_used

    from matplotlib import pyplot as plt

    plt.figure()
    plt.plot(all_end_mems)
    plt.title("Memory usage over time.")
    plt.xlabel("Trial count")
    plt.ylabel("Memory usage (Mb)")

    plt.figure()
    plt.plot(all_percents)
    plt.title("Percent of memory used over time.")
    plt.xlabel("Trial count")
    plt.ylabel("Memory usage (%)")

    plt.show(block=True)
示例#14
0
def test_annotation_memory(service):
    import psutil
    import os
    import gc, pprint

    gc.set_debug(gc.DEBUG_SAVEALL)

    with open('resources/stressfiles.json', 'r') as f:
        files = json.load(f)

    total_trials = 1000
    total_files = 1
    files_to_get = files[:total_files]
    test_rev = "58eb13b394f4"

    all_end_mems = [None] * total_trials
    all_percents = [None] * total_trials
    process = psutil.Process(os.getpid())
    start_mem = -1
    for i in range(total_trials):

        # Randomize files
        #files_to_get = [random.choice(files) for _ in range(total_files)]

        with service.conn.transaction() as t:
            t.execute("DELETE FROM temporal WHERE file IN " + quote_set(files_to_get))
            t.execute("DELETE FROM annotations WHERE file IN " + quote_set(files_to_get))
            t.execute("DELETE FROM latestFileMod WHERE file IN " + quote_set(files_to_get))

        if start_mem == -1:
            start_mem = round(process.memory_info().rss / (1000 * 1000), 2)
        service.get_tuids(files_to_get, test_rev)
        end_mem = round(process.memory_info().rss / (1000 * 1000), 2)
        pc_used = service.statsdaemon.get_used_memory_percent()

        Log.note("GC get_count: {{getc}}", getc=gc.get_count())
        Log.note("GC collect: {{getc}}", getc=gc.collect())

        Log.note(
            "Started with {{mem}}, finished with {{endmem}}. Percent currently used is {{pc}}",
            mem=start_mem,
            endmem=end_mem,
            pc=pc_used
        )
        Log.note("Used {{mem}} Mb since first get_tuids call.", mem=str(end_mem - start_mem))

        if GC_DEBUG:
            Log.note("Uncollected garbage: ")
            pprint.pprint(gc.garbage)

            import time
            time.sleep(10)

        all_end_mems[i] = end_mem
        all_percents[i] = pc_used

    from matplotlib import pyplot as plt

    plt.figure()
    plt.plot(all_end_mems)
    plt.title("Memory usage over time.")
    plt.xlabel("Trial count")
    plt.ylabel("Memory usage (Mb)")

    plt.figure()
    plt.plot(all_percents)
    plt.title("Percent of memory used over time.")
    plt.xlabel("Trial count")
    plt.ylabel("Memory usage (%)")

    plt.show(block=True)
示例#15
0
    def create_and_insert_tuids(self, revision):
        self.replace_line_with_tuidline()

        line_origins = []
        all_new_lines = []
        for line_obj in self.lines:
            line_entry = (line_obj.filename, revision, line_obj.line)
            if not line_obj.tuid or line_obj.is_new_line:
                all_new_lines.append(line_obj.line)
            line_origins.append(line_entry)

        with self.tuid_service.conn.transaction() as t:
            # Get the new lines, excluding those that have existing tuids
            existing_tuids = {}
            if len(all_new_lines) > 0:
                try:
                    existing_tuids = {
                        line: tuid
                        for tuid, file, revision, line in t.query(
                            "SELECT tuid, file, revision, line FROM temporal"
                            " WHERE file = " + quote_value(self.filename) +
                            " AND revision = " + quote_value(revision) +
                            " AND line IN " + quote_set(all_new_lines)).data
                    }
                except Exception as e:
                    # Log takes out important output, use print instead
                    self.failed_file = True
                    print("Trying to find new lines: " + str(all_new_lines))
                    Log.error("Error encountered:", cause=e)

            insert_entries = []
            insert_lines = set(all_new_lines) - set(existing_tuids.keys())
            if len(insert_lines) > 0:
                try:
                    insert_entries = [(self.tuid_service.tuid(), ) +
                                      line_origins[linenum - 1]
                                      for linenum in insert_lines]
                    insert_into_db_chunked(
                        t, insert_entries,
                        "INSERT INTO temporal (tuid, file, revision, line) VALUES "
                    )
                except Exception as e:
                    Log.note(
                        "Failed to insert new tuids (likely due to merge conflict) on {{file}}: {{cause}}",
                        file=self.filename,
                        cause=e)
                    self.failed_file = True
                    return

            fmt_inserted_lines = {
                line: tuid
                for tuid, _, _, line in insert_entries
            }
            for line_obj in self.lines:
                # If a tuid already exists for this line,
                # replace, otherwise, use the newly created one.
                if line_obj.line in existing_tuids:
                    line_obj.tuid = existing_tuids[line_obj.line]
                elif line_obj.line in fmt_inserted_lines:
                    line_obj.tuid = fmt_inserted_lines[line_obj.line]

                if not line_obj.tuid:
                    Log.warning(
                        "Cannot find TUID at {{file}} and {{rev}}for: {{line}}",
                        file=self.filename,
                        rev=revision,
                        line=str(line_obj))
                    self.failed_file = True
                    return