Пример #1
0
def _load_token_database(db, domain: Pattern[str]) -> tokens.Database:
    """Loads a Database from a database object, ELF, CSV, or binary database."""
    if db is None:
        return tokens.Database()

    if isinstance(db, tokens.Database):
        return db

    if isinstance(db, elf_reader.Elf):
        return _database_from_elf(db, domain)

    # If it's a str, it might be a path. Check if it's an ELF or CSV.
    if isinstance(db, (str, Path)):
        if not os.path.exists(db):
            raise FileNotFoundError(
                f'"{db}" is not a path to a token database')

        # Read the path as an ELF file.
        with open(db, 'rb') as fd:
            if elf_reader.compatible_file(fd):
                return _database_from_elf(fd, domain)

        # Read the path as a packed binary or CSV file.
        return tokens.DatabaseFile(db)

    # Assume that it's a file object and check if it's an ELF.
    if elf_reader.compatible_file(db):
        return _database_from_elf(db, domain)

    # Read the database as CSV or packed binary from a file object's path.
    if hasattr(db, 'name') and os.path.exists(db.name):
        return tokens.DatabaseFile(db.name)

    # Read CSV directly from the file object.
    return tokens.Database(tokens.parse_csv(db))
Пример #2
0
 def test_simple(self):
     detok = detokenize.Detokenizer(
         tokens.Database([
             tokens.TokenizedStringEntry(0xcdab, '%02d %s %c%%',
                                         dt.datetime.now())
         ]))
     self.assertEqual(str(detok.detokenize(b'\xab\xcd\0\0\x02\x03Two\x66')),
                      '01 Two 3%')
Пример #3
0
def _database_from_elf(elf, domain: Pattern[str]) -> tokens.Database:
    """Reads the tokenized strings from an elf_reader.Elf or ELF file object."""
    _LOG.debug('Reading tokenized strings in domain "%s" from %s', domain, elf)

    reader = _elf_reader(elf)

    # Read tokenized string entries.
    section_data = reader.dump_section_contents(_TOKENIZED_ENTRY_SECTIONS)
    if section_data is not None:
        return tokens.Database(_read_tokenized_entries(section_data, domain))

    # Read legacy null-terminated string entries.
    sections = reader.dump_sections(_LEGACY_STRING_SECTIONS)
    if sections:
        return tokens.Database.merged(
            *_read_tokenized_strings(sections, domain))

    return tokens.Database([])
Пример #4
0
 def setUp(self):
     super().setUp()
     self.detok = detokenize.Detokenizer(
         tokens.Database([
             tokens.TokenizedStringEntry(0, '$AAAAAA=='),  # token for 0
             tokens.TokenizedStringEntry(1, '$AgAAAA=='),  # token for 2
             tokens.TokenizedStringEntry(2, '$AwAAAA=='),  # token for 3
             tokens.TokenizedStringEntry(3, '$AgAAAA=='),  # token for 2
         ]))
Пример #5
0
 def setUp(self):
     self.db = tokens.Database([
         tokens.TokenizedStringEntry(1, 'Luke'),
         tokens.TokenizedStringEntry(2, 'Leia'),
         tokens.TokenizedStringEntry(2, 'Darth Vader'),
         tokens.TokenizedStringEntry(2, 'Emperor Palpatine'),
         tokens.TokenizedStringEntry(3, 'Han'),
         tokens.TokenizedStringEntry(4, 'Chewbacca'),
         tokens.TokenizedStringEntry(5, 'Darth Maul'),
         tokens.TokenizedStringEntry(6, 'Han Solo'),
     ])
Пример #6
0
 def test_unparsed_data(self):
     detok = detokenize.Detokenizer(
         tokens.Database([
             tokens.TokenizedStringEntry(1, 'no args',
                                         dt.datetime(100, 1, 1)),
         ]))
     result = detok.detokenize(b'\x01\0\0\0o_o')
     self.assertFalse(result.ok())
     self.assertEqual('no args', str(result))
     self.assertIn('o_o', repr(result))
     self.assertIn('decoding failed', result.error_message())
Пример #7
0
    def test_detokenize_missing_data_with_errors_is_unsuccessful(self):
        detok = detokenize.Detokenizer(tokens.Database(
            [tokens.TokenizedStringEntry(2, '%s', dt.datetime(1, 1, 1))]),
                                       show_errors=True)

        result = detok.detokenize(b'\x02\0\0\0')
        string, args, remaining = result.failures[0]
        self.assertIn('%s MISSING', string)
        self.assertEqual(len(args), 1)
        self.assertEqual(b'', remaining)
        self.assertEqual(len(result.failures), 1)
        self.assertIn('%s MISSING', str(result))
Пример #8
0
    def test_merge_multiple(self):
        db = tokens.Database.merged(
            tokens.Database(
                [tokens.TokenizedStringEntry(1, 'one',
                                             datetime.datetime.max)]),
            tokens.Database(
                [tokens.TokenizedStringEntry(2, 'two',
                                             datetime.datetime.min)]),
            tokens.Database(
                [tokens.TokenizedStringEntry(1, 'one',
                                             datetime.datetime.min)]))
        self.assertEqual({str(e) for e in db.entries()}, {'one', 'two'})

        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(4, 'four', datetime.datetime.max)
            ]),
            tokens.Database(
                [tokens.TokenizedStringEntry(2, 'two',
                                             datetime.datetime.max)]),
            tokens.Database([
                tokens.TokenizedStringEntry(3, 'three', datetime.datetime.min)
            ]))
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four'})
Пример #9
0
    def test_detokenize_extra_data_is_unsuccessful(self):
        detok = detokenize.Detokenizer(
            tokens.Database([
                tokens.TokenizedStringEntry(1, 'no args', dt.datetime(1, 1, 1))
            ]))

        result = detok.detokenize(b'\x01\0\0\0\x04args')
        self.assertEqual(len(result.failures), 1)
        string, args, remaining = result.failures[0]
        self.assertEqual('no args', string)
        self.assertFalse(args)
        self.assertEqual(b'\x04args', remaining)
        self.assertEqual('no args', string)
        self.assertEqual('no args', str(result))
Пример #10
0
    def test_detokenize_missing_data_is_unsuccessful(self):
        detok = detokenize.Detokenizer(
            tokens.Database([
                tokens.TokenizedStringEntry(2,
                                            '%s',
                                            date_removed=dt.datetime(1, 1, 1))
            ]))

        result = detok.detokenize(b'\x02\0\0\0')
        string, args, remaining = result.failures[0]
        self.assertEqual('%s', string)
        self.assertEqual(len(args), 1)
        self.assertEqual(b'', remaining)
        self.assertEqual(len(result.failures), 1)
        self.assertEqual('%s', str(result))
Пример #11
0
    def test_add(self):
        db = tokens.Database()
        db.add(_entries('MILK', 'apples'))
        self.assertEqual({e.string for e in db.entries()}, {'MILK', 'apples'})

        db.add(_entries('oranges', 'CHEESE', 'pears'))
        self.assertEqual(len(db.entries()), 5)

        db.add(_entries('MILK', 'apples', 'only this one is new'))
        self.assertEqual(len(db.entries()), 6)

        db.add(_entries('MILK'))
        self.assertEqual({e.string
                          for e in db.entries()}, {
                              'MILK', 'apples', 'oranges', 'CHEESE', 'pears',
                              'only this one is new'
                          })
Пример #12
0
    def setUp(self):
        super().setUp()
        token = 0xbaad

        # Database with several conflicting tokens.
        self.detok = detokenize.Detokenizer(tokens.Database([
            tokens.TokenizedStringEntry(token, 'REMOVED', dt.datetime(9, 1, 1)),
            tokens.TokenizedStringEntry(token, 'newer'),
            tokens.TokenizedStringEntry(token, 'A: %d', dt.datetime(30, 5, 9)),
            tokens.TokenizedStringEntry(token, 'B: %c', dt.datetime(30, 5, 10)),
            tokens.TokenizedStringEntry(token, 'C: %s'),
            tokens.TokenizedStringEntry(token, '%d%u'),
            tokens.TokenizedStringEntry(token, '%s%u %d'),
            tokens.TokenizedStringEntry(1, '%s'),
            tokens.TokenizedStringEntry(1, '%d'),
            tokens.TokenizedStringEntry(2, 'Three %s %s %s'),
            tokens.TokenizedStringEntry(2, 'Five %d %d %d %d %s'),
        ]))  # yapf: disable
Пример #13
0
    def test_merge_multiple_datbases_in_one_call(self):
        """Tests the merge and merged methods with multiple databases."""
        db = tokens.Database.merged(
            tokens.Database([
                tokens.TokenizedStringEntry(1,
                                            'one',
                                            date_removed=datetime.datetime.max)
            ]),
            tokens.Database([
                tokens.TokenizedStringEntry(2,
                                            'two',
                                            date_removed=datetime.datetime.min)
            ]),
            tokens.Database([
                tokens.TokenizedStringEntry(1,
                                            'one',
                                            date_removed=datetime.datetime.min)
            ]))
        self.assertEqual({str(e) for e in db.entries()}, {'one', 'two'})

        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(4,
                                            'four',
                                            date_removed=datetime.datetime.max)
            ]),
            tokens.Database([
                tokens.TokenizedStringEntry(2,
                                            'two',
                                            date_removed=datetime.datetime.max)
            ]),
            tokens.Database([
                tokens.TokenizedStringEntry(3,
                                            'three',
                                            date_removed=datetime.datetime.min)
            ]))
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four'})
Пример #14
0
def read_db_from_csv(csv_str: str) -> tokens.Database:
    with io.StringIO(csv_str) as csv_db:
        return tokens.Database(tokens.parse_csv(csv_db))
Пример #15
0
    def test_binary_format_parse(self):
        with io.BytesIO(BINARY_DATABASE) as binary_db:
            db = tokens.Database(tokens.parse_binary(binary_db))

        self.assertEqual(str(db), CSV_DATABASE)
Пример #16
0
    def test_merge(self):
        """Tests the tokens.Database merge method."""

        db = tokens.Database()

        # Test basic merging into an empty database.
        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(
                    1, 'one', date_removed=datetime.datetime.min),
                tokens.TokenizedStringEntry(
                    2, 'two', date_removed=datetime.datetime.min),
            ]))
        self.assertEqual({str(e) for e in db.entries()}, {'one', 'two'})
        self.assertEqual(db.token_to_entries[1][0].date_removed,
                         datetime.datetime.min)
        self.assertEqual(db.token_to_entries[2][0].date_removed,
                         datetime.datetime.min)

        # Test merging in an entry with a removal date.
        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(3, 'three'),
                tokens.TokenizedStringEntry(
                    4, 'four', date_removed=datetime.datetime.min),
            ]))
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four'})
        self.assertIsNone(db.token_to_entries[3][0].date_removed)
        self.assertEqual(db.token_to_entries[4][0].date_removed,
                         datetime.datetime.min)

        # Test merging in one entry.
        db.merge(tokens.Database([
            tokens.TokenizedStringEntry(5, 'five'),
        ]))
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four', 'five'})
        self.assertEqual(db.token_to_entries[4][0].date_removed,
                         datetime.datetime.min)
        self.assertIsNone(db.token_to_entries[5][0].date_removed)

        # Merge in repeated entries different removal dates.
        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(
                    4, 'four', date_removed=datetime.datetime.max),
                tokens.TokenizedStringEntry(
                    5, 'five', date_removed=datetime.datetime.max),
            ]))
        self.assertEqual(len(db.entries()), 5)
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four', 'five'})
        self.assertEqual(db.token_to_entries[4][0].date_removed,
                         datetime.datetime.max)
        self.assertIsNone(db.token_to_entries[5][0].date_removed)

        # Merge in the same repeated entries now without removal dates.
        db.merge(
            tokens.Database([
                tokens.TokenizedStringEntry(4, 'four'),
                tokens.TokenizedStringEntry(5, 'five')
            ]))
        self.assertEqual(len(db.entries()), 5)
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four', 'five'})
        self.assertIsNone(db.token_to_entries[4][0].date_removed)
        self.assertIsNone(db.token_to_entries[5][0].date_removed)

        # Merge in an empty databsse.
        db.merge(tokens.Database([]))
        self.assertEqual({str(e)
                          for e in db.entries()},
                         {'one', 'two', 'three', 'four', 'five'})