Пример #1
0
  def testReadline(self):
    """Test the readline() function."""
    test_file_path = self._GetTestFilePath(['another_file'])
    self._SkipIfPathNotExists(test_file_path)

    test_path_spec = os_path_spec.OSPathSpec(location=test_file_path)

    file_object = os_file_io.OSFile(self._resolver_context)
    file_object.open(test_path_spec)
    line_reader = line_reader_file.BinaryLineReader(file_object)

    line = line_reader.readline()
    self.assertEqual(line, b'This is another file.\n')

    offset = line_reader.tell()
    self.assertEqual(offset, 22)

    line_reader = line_reader_file.BinaryLineReader(file_object)

    line = line_reader.readline(size=11)
    self.assertEqual(line, b'This is ano')

    offset = line_reader.tell()
    self.assertEqual(offset, 11)

    file_object.close()
Пример #2
0
    def testReadlineMultipleLines(self):
        """Test the readline() function on multiple lines."""
        test_file = self._GetTestFilePath(['password.csv'])
        test_path_spec = os_path_spec.OSPathSpec(location=test_file)

        file_object = os_file_io.OSFile(self._resolver_context)
        file_object.open(test_path_spec)
        line_reader = line_reader_file.BinaryLineReader(file_object)

        line = line_reader.readline()
        self.assertEqual(line, b'place,user,password\n')

        offset = line_reader.tell()
        self.assertEqual(offset, 20)

        line = line_reader.readline(size=5)
        self.assertEqual(line, b'bank,')

        offset = line_reader.tell()
        self.assertEqual(offset, 25)

        line = line_reader.readline()
        self.assertEqual(line, b'joesmith,superrich\n')

        offset = line_reader.tell()
        self.assertEqual(offset, 44)

        line = line_reader.readline()
        self.assertEqual(line, b'alarm system,-,1234\n')

        offset = line_reader.tell()
        self.assertEqual(offset, 64)

        file_object.close()
Пример #3
0
    def _CreateLineReader(self, file_object):
        """Creates an object that reads lines from a text file.

    The line reader is advanced to the beginning of the DSV content, skipping
    any header lines.

    Args:
      file_object (dfvfs.FileIO): file-like object.

    Returns:
      TextFile|BinaryLineReader: an object that implements an iterator
          over lines in a text file.

    Raises:
      UnicodeDecodeError: if the file cannot be read with the specified
          encoding.
    """
        # The Python 2 csv module reads bytes and the Python 3 csv module Unicode
        # reads strings.
        if py2to3.PY_3:
            line_reader = text_file.TextFile(file_object,
                                             encoding=self._encoding,
                                             end_of_line=self._end_of_line)
        else:
            line_reader = line_reader_file.BinaryLineReader(
                file_object, end_of_line=self._end_of_line)
        # If we specifically define a number of lines we should skip, do that here.
        for _ in range(0, self.NUMBER_OF_HEADER_LINES):
            try:
                line_reader.readline(self._maximum_line_length)
            except UnicodeDecodeError:
                raise
        return line_reader
Пример #4
0
    def testIterator(self):
        """Tests the iterator functionality."""
        test_file_path = self._GetTestFilePath(['password.csv'])
        self._SkipIfPathNotExists(test_file_path)

        resolver_context = context.Context()

        test_path_spec = os_path_spec.OSPathSpec(location=test_file_path)
        file_object = path_spec_resolver.Resolver.OpenFileObject(
            test_path_spec, resolver_context=resolver_context)

        line_reader = line_reader_file.BinaryLineReader(file_object)

        dsv_reader = line_reader_file.BinaryDSVReader(line_reader,
                                                      delimiter=b',')

        rows = []
        for row in dsv_reader:
            rows.append(row)

        self.assertEqual(len(rows), 5)
        self.assertEqual(rows[0], [b'place', b'user', b'password'])
        self.assertEqual(rows[1], [b'bank', b'joesmith', b'superrich'])
        self.assertEqual(rows[2], [b'alarm system', b'-', b'1234'])
        self.assertEqual(rows[3], [b'treasure chest', b'-', b'1111'])
        self.assertEqual(rows[4], [b'uber secret laire', b'admin', b'admin'])
Пример #5
0
    def testReadlinesWithFileWithoutNewLineAtEnd(self):
        """Test reading lines from a file without a new line char at the end."""
        test_file = self._GetTestFilePath(['mactime.body'])
        test_file_path_spec = os_path_spec.OSPathSpec(location=test_file)

        file_object = os_file_io.OSFile(self._resolver_context)
        file_object.open(test_file_path_spec)
        line_reader = line_reader_file.BinaryLineReader(file_object)

        lines = line_reader.readlines()

        self.assertEqual(len(lines), 17)
Пример #6
0
    def testReadlinesWithFileWithoutNewLineAtEnd(self):
        """Test reading lines from a file without a new line char at the end."""
        test_file_path = self._GetTestFilePath(['mactime.body'])
        self._SkipIfPathNotExists(test_file_path)

        test_path_spec = os_path_spec.OSPathSpec(location=test_file_path)
        file_object = path_spec_resolver.Resolver.OpenFileObject(
            test_path_spec, resolver_context=self._resolver_context)

        line_reader = line_reader_file.BinaryLineReader(file_object)

        lines = line_reader.readlines()

        self.assertEqual(len(lines), 22)
Пример #7
0
    def testReadlinesWithSizeHint(self):
        """Test the readlines() function."""
        test_file = self._GetTestFilePath(['password.csv'])
        test_path_spec = os_path_spec.OSPathSpec(location=test_file)

        file_object = os_file_io.OSFile(self._resolver_context)
        file_object.open(test_path_spec)
        line_reader = line_reader_file.BinaryLineReader(file_object)

        lines = line_reader.readlines(sizehint=60)

        self.assertEqual(len(lines), 3)
        self.assertEqual(lines[0], b'place,user,password\n')
        self.assertEqual(lines[1], b'bank,joesmith,superrich\n')
        self.assertEqual(lines[2], b'alarm system,-,1234\n')

        file_object.close()
Пример #8
0
    def testReadlinesWithSizeHint(self):
        """Test the readlines() function."""
        test_file_path = self._GetTestFilePath(['password.csv'])
        self._SkipIfPathNotExists(test_file_path)

        test_path_spec = os_path_spec.OSPathSpec(location=test_file_path)
        file_object = path_spec_resolver.Resolver.OpenFileObject(
            test_path_spec, resolver_context=self._resolver_context)

        line_reader = line_reader_file.BinaryLineReader(file_object)

        lines = line_reader.readlines(sizehint=60)

        self.assertEqual(len(lines), 3)
        self.assertEqual(lines[0], b'place,user,password\n')
        self.assertEqual(lines[1], b'bank,joesmith,superrich\n')
        self.assertEqual(lines[2], b'alarm system,-,1234\n')
Пример #9
0
    def testReadlines(self):
        """Test the readlines() function."""
        test_file = self._GetTestFilePath(['password.csv'])
        test_path_spec = os_path_spec.OSPathSpec(location=test_file)

        file_object = os_file_io.OSFile(self._resolver_context)
        file_object.open(test_path_spec)
        line_reader = line_reader_file.BinaryLineReader(file_object)

        lines = line_reader.readlines()

        self.assertEqual(len(lines), 5)
        self.assertEqual(lines[0], b'place,user,password\n')
        self.assertEqual(lines[1], b'bank,joesmith,superrich\n')
        self.assertEqual(lines[2], b'alarm system,-,1234\n')
        self.assertEqual(lines[3], b'treasure chest,-,1111\n')
        self.assertEqual(lines[4], b'uber secret laire,admin,admin\n')

        file_object.close()
Пример #10
0
    def _CreateLineReader(self, file_object):
        """Creates an object that reads lines from a text file.

    The line reader is advanced to the beginning of the DSV content, skipping
    any header lines.

    Args:
      file_object (dfvfs.FileIO): file-like object.

    Returns:
      TextFile|BinaryLineReader: an object that implements an iterator
          over lines in a text file.

    Raises:
      UnicodeDecodeError: if the file cannot be read with the specified
          encoding.
    """
        # The Python 2 csv module reads bytes and the Python 3 csv module Unicode
        # reads strings.
        if py2to3.PY_3:
            line_reader = text_file.TextFile(file_object,
                                             encoding=self._encoding,
                                             end_of_line=self._end_of_line)

            # pylint: disable=protected-access
            maximum_read_buffer_size = line_reader._MAXIMUM_READ_BUFFER_SIZE

        else:
            line_reader = line_reader_file.BinaryLineReader(
                file_object, end_of_line=self._end_of_line)

            maximum_read_buffer_size = line_reader.MAXIMUM_READ_BUFFER_SIZE

        # Line length is one less than the maximum read buffer size so that we
        # tell if there's a line that doesn't end at the end before the end of
        # the file.
        if self._maximum_line_length > maximum_read_buffer_size:
            self._maximum_line_length = maximum_read_buffer_size - 1

        # If we specifically define a number of lines we should skip, do that here.
        for _ in range(0, self.NUMBER_OF_HEADER_LINES):
            line_reader.readline(self._maximum_line_length)
        return line_reader
Пример #11
0
    def testIterator(self):
        """Test the iterator functionality."""
        test_file_path = self._GetTestFilePath(['password.csv'])
        self._SkipIfPathNotExists(test_file_path)

        test_path_spec = os_path_spec.OSPathSpec(location=test_file_path)
        file_object = path_spec_resolver.Resolver.OpenFileObject(
            test_path_spec, resolver_context=self._resolver_context)

        line_reader = line_reader_file.BinaryLineReader(file_object)

        lines = []
        for line in line_reader:
            lines.append(line)

        self.assertEqual(len(lines), 5)
        self.assertEqual(lines[0], b'place,user,password\n')
        self.assertEqual(lines[1], b'bank,joesmith,superrich\n')
        self.assertEqual(lines[2], b'alarm system,-,1234\n')
        self.assertEqual(lines[3], b'treasure chest,-,1111\n')
        self.assertEqual(lines[4], b'uber secret laire,admin,admin\n')
Пример #12
0
    def _ParseFileData(self, mediator, file_object):
        """Parses file content (data) for user account preprocessing attributes.

    Args:
      mediator (PreprocessMediator): mediates interactions between preprocess
          plugins and other components, such as storage and knowledge base.
      file_object (dfvfs.FileIO): file-like object that contains the artifact
          value data.

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
        line_reader = line_reader_file.BinaryLineReader(file_object)

        try:
            reader = line_reader_file.BinaryDSVReader(line_reader, b':')
        except csv.Error as exception:
            raise errors.PreProcessFail(
                'Unable to read: {0:s} with error: {1!s}'.format(
                    self.ARTIFACT_DEFINITION_NAME, exception))

        for line_number, row in enumerate(reader):
            if len(row) < 7 or not row[0] or not row[2]:
                mediator.ProducePreprocessingWarning(
                    self.ARTIFACT_DEFINITION_NAME,
                    'Unsupported number of values in line: {0:d}.'.format(
                        line_number))
                continue

            try:
                username = row[0].decode('utf-8')
            except UnicodeDecodeError:
                mediator.ProducePreprocessingWarning(
                    self.ARTIFACT_DEFINITION_NAME,
                    'Unable to decode username.')
                continue

            try:
                identifier = row[2].decode('utf-8')
            except UnicodeDecodeError:
                mediator.ProducePreprocessingWarning(
                    self.ARTIFACT_DEFINITION_NAME,
                    'Unable to decode user identifier.')
                continue

            group_identifier = None
            if row[3]:
                try:
                    group_identifier = row[3].decode('utf-8')
                except UnicodeDecodeError:
                    mediator.ProducePreprocessingWarning(
                        self.ARTIFACT_DEFINITION_NAME,
                        'Unable to decode group identifier.')

            full_name = None
            if row[4]:
                try:
                    full_name = row[4].decode('utf-8')
                except UnicodeDecodeError:
                    mediator.ProducePreprocessingWarning(
                        self.ARTIFACT_DEFINITION_NAME,
                        'Unable to decode full name.')

            user_directory = None
            if row[5]:
                try:
                    user_directory = row[5].decode('utf-8')
                except UnicodeDecodeError:
                    mediator.ProducePreprocessingWarning(
                        self.ARTIFACT_DEFINITION_NAME,
                        'Unable to decode user directory.')

            shell = None
            if row[6]:
                try:
                    shell = row[6].decode('utf-8')
                except UnicodeDecodeError:
                    mediator.ProducePreprocessingWarning(
                        self.ARTIFACT_DEFINITION_NAME,
                        'Unable to decode shell.')

            user_account = artifacts.UserAccountArtifact(identifier=identifier,
                                                         username=username)
            user_account.group_identifier = group_identifier
            user_account.full_name = full_name
            user_account.user_directory = user_directory
            user_account.shell = shell

            try:
                mediator.AddUserAccount(user_account)
            except KeyError as exception:
                mediator.ProducePreprocessingWarning(
                    self.ARTIFACT_DEFINITION_NAME,
                    'Unable to add user account with error: {0!s}'.format(
                        exception))
Пример #13
0
    def ParseFileObject(self, parser_mediator, file_object, **unused_kwargs):
        """Parses a DSV text file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_size = file_object.get_size()
        # The csv module can consume a lot of memory, 1 GiB for a 100 MiB file.
        # Hence that the maximum supported file size is restricted.
        if file_size > self._MAXIMUM_SUPPORTED_FILE_SIZE:
            display_name = parser_mediator.GetDisplayName()
            raise errors.UnableToParseFile((
                '[{0:s}] Unable to parse DSV file: {1:s} size of file exceeds '
                'maximum supported size').format(self.NAME, display_name))

        # TODO: Replace this with detection of the file encoding via byte-order
        # marks. Also see: https://github.com/log2timeline/plaso/issues/1971
        if not self._encoding:
            self._encoding = parser_mediator.codepage

        # The Python 2 csv module reads bytes and the Python 3 csv module Unicode
        # reads strings.
        if py2to3.PY_3:
            line_reader = text_file.TextFile(file_object,
                                             encoding=self._encoding)
        else:
            line_reader = line_reader_file.BinaryLineReader(file_object)

        # If we specifically define a number of lines we should skip, do that here.
        for _ in range(0, self.NUMBER_OF_HEADER_LINES):
            line_reader.readline()

        reader = self._CreateDictReader(line_reader)

        row_offset = line_reader.tell()
        try:
            row = next(reader)
        except (StopIteration, csv.Error, UnicodeDecodeError) as exception:
            display_name = parser_mediator.GetDisplayName()
            raise errors.UnableToParseFile(
                '[{0:s}] Unable to parse DSV file: {1:s} with error: {2!s}.'.
                format(self.NAME, display_name, exception))

        number_of_columns = len(self.COLUMNS)
        number_of_records = len(row)

        if number_of_records != number_of_columns:
            display_name = parser_mediator.GetDisplayName()
            raise errors.UnableToParseFile(
                ('[{0:s}] Unable to parse DSV file: {1:s}. Wrong number of '
                 'records (expected: {2:d}, got: {3:d})').format(
                     self.NAME, display_name, number_of_columns,
                     number_of_records))

        for key, value in row.items():
            if self._MAGIC_TEST_STRING in (key, value):
                display_name = parser_mediator.GetDisplayName()
                raise errors.UnableToParseFile(
                    ('[{0:s}] Unable to parse DSV file: {1:s}. Signature '
                     'mismatch.').format(self.NAME, display_name))

        row = self._ConvertRowToUnicode(parser_mediator, row)

        if not self.VerifyRow(parser_mediator, row):
            display_name = parser_mediator.GetDisplayName()
            raise errors.UnableToParseFile(
                ('[{0:s}] Unable to parse DSV file: {1:s}. Verification '
                 'failed.').format(self.NAME, display_name))

        self.ParseRow(parser_mediator, row_offset, row)
        row_offset = line_reader.tell()

        for row in reader:
            if parser_mediator.abort:
                break
            row = self._ConvertRowToUnicode(parser_mediator, row)
            self.ParseRow(parser_mediator, row_offset, row)
            row_offset = line_reader.tell()
Пример #14
0
    def ParseFileObject(self, parser_mediator, file_object, **unused_kwargs):
        """Parses a DSV text file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        line_reader = line_reader_file.BinaryLineReader(file_object)

        # If we specifically define a number of lines we should skip, do that here.
        for _ in range(0, self.NUMBER_OF_HEADER_LINES):
            line_reader.readline()

        reader = self._CreateDictReader(parser_mediator, line_reader)

        row_offset = line_reader.tell()
        try:
            row = next(reader)
        except (StopIteration, csv.Error) as exception:
            display_name = parser_mediator.GetDisplayName()
            raise errors.UnableToParseFile(
                '[{0:s}] Unable to parse DSV file: {1:s} with error: {2!s}.'.
                format(self.NAME, display_name, exception))

        number_of_columns = len(self.COLUMNS)
        number_of_records = len(row)

        if number_of_records != number_of_columns:
            display_name = parser_mediator.GetDisplayName()
            raise errors.UnableToParseFile(
                ('[{0:s}] Unable to parse DSV file: {1:s}. Wrong number of '
                 'records (expected: {2:d}, got: {3:d})').format(
                     self.NAME, display_name, number_of_columns,
                     number_of_records))

        for key, value in row.items():
            if self._MAGIC_TEST_STRING in (key, value):
                display_name = parser_mediator.GetDisplayName()
                raise errors.UnableToParseFile(
                    ('[{0:s}] Unable to parse DSV file: {1:s}. Signature '
                     'mismatch.').format(self.NAME, display_name))

        if not self.VerifyRow(parser_mediator, row):
            display_name = parser_mediator.GetDisplayName()
            raise errors.UnableToParseFile(
                ('[{0:s}] Unable to parse DSV file: {1:s}. Verification '
                 'failed.').format(self.NAME, display_name))

        row = self._ConvertRowToUnicode(parser_mediator, row)
        self.ParseRow(parser_mediator, row_offset, row)
        row_offset = line_reader.tell()

        for row in reader:
            if parser_mediator.abort:
                break
            row = self._ConvertRowToUnicode(parser_mediator, row)
            self.ParseRow(parser_mediator, row_offset, row)
            row_offset = line_reader.tell()
Пример #15
0
    def _ParseFileData(self, knowledge_base, file_object):
        """Parses file content (data) for user account preprocessing attributes.

    Args:
      knowledge_base (KnowledgeBase): to fill with preprocessing information.
      file_object (dfvfs.FileIO): file-like object that contains the artifact
          value data.

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
        line_reader = line_reader_file.BinaryLineReader(file_object)

        try:
            reader = line_reader_file.BinaryDSVReader(line_reader, b':')
        except csv.Error as exception:
            raise errors.PreProcessFail(
                'Unable to read: {0:s} with error: {1!s}'.format(
                    self.ARTIFACT_DEFINITION_NAME, exception))

        for row in reader:
            if len(row) < 7 or not row[0] or not row[2]:
                # TODO: add and store preprocessing errors.
                continue

            try:
                username = row[0].decode('utf-8')
            except UnicodeDecodeError:
                # TODO: add and store preprocessing errors.
                logger.error('Unable to decode username.')
                continue

            try:
                identifier = row[2].decode('utf-8')
            except UnicodeDecodeError:
                # TODO: add and store preprocessing errors.
                logger.error('Unable to decode identifier.')
                continue

            group_identifier = None
            if row[3]:
                try:
                    group_identifier = row[3].decode('utf-8')
                except UnicodeDecodeError:
                    # TODO: add and store preprocessing errors.
                    logger.error('Unable to decode group identifier.')

            full_name = None
            if row[4]:
                try:
                    full_name = row[4].decode('utf-8')
                except UnicodeDecodeError:
                    # TODO: add and store preprocessing errors.
                    logger.error('Unable to decode full name.')

            user_directory = None
            if row[5]:
                try:
                    user_directory = row[5].decode('utf-8')
                except UnicodeDecodeError:
                    # TODO: add and store preprocessing errors.
                    logger.error('Unable to decode user directory.')

            shell = None
            if row[6]:
                try:
                    shell = row[6].decode('utf-8')
                except UnicodeDecodeError:
                    # TODO: add and store preprocessing errors.
                    logger.error('Unable to decode shell.')

            user_account = artifacts.UserAccountArtifact(identifier=identifier,
                                                         username=username)
            user_account.group_identifier = group_identifier
            user_account.full_name = full_name
            user_account.user_directory = user_directory
            user_account.shell = shell

            try:
                knowledge_base.AddUserAccount(user_account)
            except KeyError:
                # TODO: add and store preprocessing errors.
                pass
Пример #16
0
  def ParseFileObject(self, parser_mediator, file_object, **unused_kwargs):
    """Parses a DSV text file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
    if not self._encoding:
      self._encoding = parser_mediator.codepage

    delimiter = self.DELIMITER
    quotechar = self.QUOTE_CHAR
    magic_test_string = self._MAGIC_TEST_STRING
    # Python 3 csv module requires arguments to constructor to be of type str.
    if sys.version_info[0] >= 3:
      delimiter = delimiter.decode(self._encoding)
      quotechar = quotechar.decode(self._encoding)
      magic_test_string = magic_test_string.decode(self._encoding)

    line_reader = line_reader_file.BinaryLineReader(file_object)

    # If we specifically define a number of lines we should skip, do that here.
    for _ in range(0, self.NUMBER_OF_HEADER_LINES):
      line_reader.readline()

    reader = csv.DictReader(
        line_reader, delimiter=delimiter, fieldnames=self.COLUMNS,
        quotechar=quotechar, restkey=magic_test_string,
        restval=magic_test_string)

    row_offset = line_reader.tell()
    try:
      row = next(reader)
    except (StopIteration, csv.Error) as exception:
      display_name = parser_mediator.GetDisplayName()
      raise errors.UnableToParseFile(
          '[{0:s}] Unable to parse DSV file: {1:s} with error: {2!s}.'.format(
              self.NAME, display_name, exception))

    number_of_columns = len(self.COLUMNS)
    number_of_records = len(row)

    if number_of_records != number_of_columns:
      display_name = parser_mediator.GetDisplayName()
      raise errors.UnableToParseFile((
          '[{0:s}] Unable to parse DSV file: {1:s}. Wrong number of '
          'records (expected: {2:d}, got: {3:d})').format(
              self.NAME, display_name, number_of_columns,
              number_of_records))

    for key, value in row.items():
      if self._MAGIC_TEST_STRING in (key, value):
        display_name = parser_mediator.GetDisplayName()
        raise errors.UnableToParseFile((
            '[{0:s}] Unable to parse DSV file: {1:s}. Signature '
            'mismatch.').format(self.NAME, display_name))

    if not self.VerifyRow(parser_mediator, row):
      display_name = parser_mediator.GetDisplayName()
      raise errors.UnableToParseFile((
          '[{0:s}] Unable to parse DSV file: {1:s}. Verification '
          'failed.').format(self.NAME, display_name))

    row = self._ConvertRowToUnicode(parser_mediator, row)
    self.ParseRow(parser_mediator, row_offset, row)
    row_offset = line_reader.tell()

    for row in reader:
      if parser_mediator.abort:
        break
      row = self._ConvertRowToUnicode(parser_mediator, row)
      self.ParseRow(parser_mediator, row_offset, row)
      row_offset = line_reader.tell()