Python PffArchive примеры использования

Язык программирования: Python

Пространство имен/Пакет: libratom.lib.pff

Класс/Тип: PffArchive

Примеров на hotexamples.com: 15

Python PffArchive - 15 примеров найдено. Это лучшие примеры Python кода для libratom.lib.pff.PffArchive, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

PffArchive(15)

format_message(1)

Основные методы

PffArchive (15)

format_message (1)

Пример #1

Показать файл

def test_pffarchive_format_message(enron_dataset_part004, empty_message):

    for pst_file in enron_dataset_part004.glob("*.pst"):
        with PffArchive(pst_file) as archive:
            for message in archive.messages():
                # The assertion here doesn't matter as much as
                # not getting an exception from python's email parsing module
                assert email.message_from_string(
                    archive.format_message(message), policy=policy.default
                ) or not archive.format_message(message)

    assert PffArchive.format_message(empty_message) == ""

Пример #2

Показать файл

def test_extract_message_attachments(enron_dataset_part002):
    """Checking 3 known attachments, to validate the attachment extraction process
    """

    digests = {
        47685: "d48232614b01e56014293854abbb5db3",
        47717: "cf8be7cd3e6e14307972246e2942c9d1",
        47749: "081e6b66dc89671ff6460adac94dbab1",
    }

    with PffArchive(next(enron_dataset_part002.glob(
            "*.pst"))) as archive, TemporaryDirectory() as tmp_dir:

        # Get message by ID
        node = archive.tree.get_node(2128676)
        message = node.data

        for att in message.attachments:
            # Read attachment as bytes
            rbuf = att.read_buffer(att.size)

            # Save attachment
            filepath = (Path(tmp_dir) /
                        f"attachment_{message.identifier}_{att.identifier}")
            filepath.write_bytes(rbuf)

            # Confirm checksum
            assert hashlib.md5(rbuf).hexdigest() == digests[att.identifier]

            # Sanity check on the file
            assert filepath.stat().st_size == att.size

Пример #3

Показать файл

def test_get_message_by_id(sample_pst_file):
    with PffArchive(sample_pst_file) as archive:
        for message in archive.messages():
            msg = archive.get_message_by_id(message.identifier)
            assert msg.identifier == message.identifier
            assert archive.format_message(msg) == archive.format_message(
                message)

Пример #4

Показать файл

def test_pff_archive_with_bad_folders(sample_pst_file):
    with PffArchive(sample_pst_file) as archive:
        with patch.object(archive, "folders") as mock_folders:
            mock_folders.return_value = [BadPffFolder()]

            # No uncaught exception
            assert archive.message_count == 0
            assert not list(archive.messages())

Пример #5

Показать файл

def load_pst(filename):

    mailbox_path = Path("..")
    print(mailbox_path.absolute())

    report = {
        'Files': 0,
        'Messages': 0,
        'Attachments': 0,
        'Size': 0,
        'Errors': 0
    }

    # Start displaying results
    print(sorted(mailbox_path.glob("*")))
    files = sorted(mailbox_path.glob('**/' + filename))
    print(files)
    identifier_set = Counter()
    # Iterate over files
    with tqdm(total=len(files), desc="Files read", unit="files",
              leave=True) as file_bar:
        for pst_file in files:
            try:
                # Iterate over messages

                with PffArchive(pst_file) as archive:
                    print(archive)
                    for message in archive.messages():
                        try:
                            # Do something with the message...

                            emails = re.findall("<[^<>]*@{1}[^<>]*>",
                                                message.transport_headers)
                            for each in emails:
                                identifier_set[each[1:-1]] += 1
                            print()
                            # Update report
                            report['Messages'] += 1
                            report[
                                'Attachments'] += message.number_of_attachments

                            # Refresh report widget every 100 messages

                        except Exception as exc:
                            # Log error and move on to the next message
                            report['Errors'] += 1

            except Exception as exc:
                # Log error and move on to the next file
                print(exc)
                print("except")
                report['Errors'] += 1

            # Update report
            report['Files'] += 1
            report['Size'] += pst_file.stat().st_size
    return report

Пример #6

Показать файл

Файл: test_pst.py Проект: fengluisobel/libratom

def test_extract_enron_messages_from_file(enron_dataset_file):
    """Similar to test_extract_enron_messages but with parametrized fixture"""
    try:
        # Iterate over messages and copy message string
        with PffArchive(enron_dataset_file) as archive:
            for message in archive.messages():
                _ = archive.format_message(message)

    except Exception as exc:  # pylint: disable=broad-except
        logger.info(f"Inspecting {enron_dataset_file}")
        logger.exception(exc)

Пример #7

Показать файл

def test_get_transport_headers_from_sent_items(enron_dataset_part004):

    for pst_file in enron_dataset_part004.glob("*.pst"):
        with PffArchive(pst_file) as archive:
            for folder in archive.folders():
                try:
                    name = folder.name.lower()
                except AttributeError:
                    # pylint: disable=no-member
                    if folder.identifier != archive._data.root_folder.identifier:
                        raise
                    continue
                if "sent mail" in name or "sent items" in name:
                    for message in folder.sub_messages:
                        assert message.transport_headers

Пример #8

Показать файл

Файл: ATClipper.py Проект: shreyahavaldar/ATClipper

        def load_pst(self, filename):

            mailbox_path = Path()

            report = {
                'Files': 0,
                'Messages': 0,
                'Attachments': 0,
                'Size': 0,
                'Errors': 0
            }

            # Start displaying results
            files = sorted(mailbox_path.glob(filename))
            identifier_set = Counter()
            # Iterate over files
            for pst_file in files:
                try:
                    # Iterate over messages
                    with PffArchive(pst_file) as archive:

                        for message in archive.messages():
                            try:

                                identifiers = re.findall(
                                    "<[^<>]*@{1}[^<>]*>",
                                    message.transport_headers)
                                for each in identifiers:
                                    identifier_set[each[1:-1]] += 1
                                # Update report
                                report['Messages'] += 1
                                report[
                                    'Attachments'] += message.number_of_attachments

                            except Exception as exc:
                                # Log error and move on to the next message
                                report['Errors'] += 1

                except Exception as exc:
                    # Log error and move on to the next file
                    report['Errors'] += 1

                # Update report
                report['Files'] += 1
                report['Size'] += pst_file.stat().st_size

            self.identifiers = set(identifier_set.keys())
            self.identifiers.add("*****@*****.**")

Пример #9

Показать файл

Файл: test_pst.py Проект: fengluisobel/libratom

def test_extract_enron_messages(enron_dataset):
    nb_extracted = 0
    total_size = 0

    for pst_file in enron_dataset.glob("**/*.pst"):
        try:
            # Iterate over messages and copy message string
            with PffArchive(pst_file) as archive:
                for message in archive.messages():
                    _ = archive.format_message(message)

                    # Increment message count
                    nb_extracted += 1

            # Add file size to running total
            total_size += pst_file.stat().st_size

        except Exception as exc:  # pylint: disable=broad-except
            logger.info(f"Inspecting {pst_file}")
            logger.exception(exc)

    logger.info(
        f"Extracted {nb_extracted} messages from a total of {humanfriendly.format_size(total_size)}"
    )

Пример #10

Показать файл

def test_pffarchive_iterate_over_messages(sample_pst_file, bfs):

    with PffArchive(sample_pst_file) as archive:
        for message in archive.messages(bfs=bfs):
            assert message.plain_text_body

Пример #11

Показать файл

def test_pffarchive_load_from_invalid_type():

    with pytest.raises(TypeError):
        _ = PffArchive(1)

Пример #12

Показать файл

def test_pffarchive_load_from_file_object(sample_pst_file):

    with sample_pst_file.open(mode="rb") as f, PffArchive(f) as archive:
        assert len(list(archive.messages())) == 2668

Пример #13

Показать файл

def test_get_message_body(message, body_type):
    assert PffArchive().get_message_body(message)[1] is body_type

Пример #14

Показать файл

def test_get_attachment_metadata(mock_cls):
    message = MagicMock(identifier=123,
                        attachments=[mock_cls(name="foo", size="0")])

    assert PffArchive().get_attachment_metadata(message)[0].mime_type is None

Пример #15

Показать файл

def test_get_message_by_id_with_bad_id(sample_pst_file):
    with PffArchive(sample_pst_file) as archive:
        assert archive.get_message_by_id(1234) is None