Exemplo n.º 1
0
    def testReadlineUTF16(self):
        """Test the readline() function on UTF-16 encoded text."""
        test_file = self._GetTestFilePath(['another_file.utf16'])
        self._SkipIfPathNotExists(test_file)

        test_path_spec = os_path_spec.OSPathSpec(location=test_file)

        file_object = os_file_io.OSFile(self._resolver_context)
        file_object.open(test_path_spec)
        text_file_object = text_file.TextFile(file_object,
                                              encoding='utf-16-le')

        line = text_file_object.readline()
        self.assertEqual(line, 'This is another file.\n')

        offset = text_file_object.get_offset()
        self.assertEqual(offset, 46)

        text_file_object = text_file.TextFile(file_object,
                                              encoding='utf-16-le')

        line = text_file_object.readline(size=24)
        self.assertEqual(line, 'This is ano')

        offset = text_file_object.get_offset()
        self.assertEqual(offset, 24)

        file_object.close()
Exemplo n.º 2
0
  def testReadlineUTF16(self):
    """Test the readline() function on UTF-16 encoded text."""
    test_path = self._GetTestFilePath(['another_file.utf16'])
    self._SkipIfPathNotExists(test_path)

    test_os_path_spec = path_spec_factory.Factory.NewPathSpec(
        definitions.TYPE_INDICATOR_OS, location=test_path)
    file_object = resolver.Resolver.OpenFileObject(
        test_os_path_spec, resolver_context=self._resolver_context)

    text_file_object = text_file.TextFile(file_object, encoding='utf-16-le')

    line = text_file_object.readline()
    self.assertEqual(line, 'This is another file.\n')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 46)

    text_file_object = text_file.TextFile(file_object, encoding='utf-16-le')

    line = text_file_object.readline(size=24)
    self.assertEqual(line, 'This is ano')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 24)
Exemplo n.º 3
0
  def testReadlineWithError(self):
    """Test the readline() function with an encoding error."""
    test_path = self._GetTestFilePath(['another_file_with_error'])
    self._SkipIfPathNotExists(test_path)

    test_os_path_spec = path_spec_factory.Factory.NewPathSpec(
        definitions.TYPE_INDICATOR_OS, location=test_path)
    file_object = resolver.Resolver.OpenFileObject(
        test_os_path_spec, resolver_context=self._resolver_context)

    text_file_object = text_file.TextFile(file_object)

    with self.assertRaises(UnicodeDecodeError):
      text_file_object.readline()

    text_file_object = text_file.TextFile(
        file_object, encoding_errors='replace')

    line = text_file_object.readline()
    self.assertEqual(line, 'This is ano\ufffdher file.\n')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 22)

    text_file_object = text_file.TextFile(file_object)

    line = text_file_object.readline(size=11)
    self.assertEqual(line, 'This is ano')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 11)
Exemplo n.º 4
0
    def testReadline(self):
        """Test the readline() function."""
        test_file = self._GetTestFilePath(['another_file'])
        self._SkipIfPathNotExists(test_file)

        test_path_spec = os_path_spec.OSPathSpec(location=test_file)

        file_object = os_file_io.OSFile(self._resolver_context)
        file_object.open(test_path_spec)
        text_file_object = text_file.TextFile(file_object)

        line = text_file_object.readline()
        self.assertEqual(line, 'This is another file.\n')

        offset = text_file_object.get_offset()
        self.assertEqual(offset, 22)

        text_file_object = text_file.TextFile(file_object)

        line = text_file_object.readline(size=11)
        self.assertEqual(line, 'This is ano')

        offset = text_file_object.get_offset()
        self.assertEqual(offset, 11)

        file_object.close()
Exemplo n.º 5
0
    def testReadLine(self):
        """Tests the _ReadLine function."""
        resolver_context = dfvfs_context.Context()

        test_path_spec = fake_path_spec.FakePathSpec(location='/file.txt')
        data = b'This is another file.'
        file_object = fake_file_io.FakeFile(resolver_context, test_path_spec,
                                            data)
        file_object.Open()

        test_parser = TestPyparsingSingleLineTextParser()
        test_text_file = dfvfs_text_file.TextFile(file_object,
                                                  encoding='utf-8')
        line = test_parser._ReadLine(test_text_file)
        self.assertEqual(line, 'This is another file.')

        test_path_spec = fake_path_spec.FakePathSpec(location='/file.txt')
        data = b'This is an\xbather file.'
        file_object = fake_file_io.FakeFile(resolver_context, test_path_spec,
                                            data)
        file_object.Open()

        test_parser = TestPyparsingSingleLineTextParser()
        test_text_file = dfvfs_text_file.TextFile(file_object, encoding='utf8')
        with self.assertRaises(UnicodeDecodeError):
            test_parser._ReadLine(test_text_file)

        test_path_spec = fake_path_spec.FakePathSpec(location='/file.txt')
        data = b'This is an\xbather file.'
        file_object = fake_file_io.FakeFile(resolver_context, test_path_spec,
                                            data)
        file_object.Open()

        test_parser = TestPyparsingSingleLineTextParser()
        test_text_file = dfvfs_text_file.TextFile(file_object,
                                                  encoding='utf8',
                                                  encoding_errors='replace')
        line = test_parser._ReadLine(test_text_file)
        self.assertEqual(line, 'This is an\ufffdther file.')

        self._encoding_errors = []
        codecs.register_error('test_handler', self._EncodingErrorHandler)

        test_path_spec = fake_path_spec.FakePathSpec(location='/file.txt')
        data = b'This is an\xbather file.'
        file_object = fake_file_io.FakeFile(resolver_context, test_path_spec,
                                            data)
        file_object.Open()

        test_parser = TestPyparsingSingleLineTextParser()
        test_text_file = dfvfs_text_file.TextFile(
            file_object, encoding='utf8', encoding_errors='test_handler')
        line = test_parser._ReadLine(test_text_file)
        self.assertEqual(line, 'This is an\\xbather file.')

        self.assertEqual(len(self._encoding_errors), 1)
        self.assertEqual(self._encoding_errors[0], (10, 0xba))
Exemplo n.º 6
0
    def _ParseFileData(self, knowledge_base, file_object):
        """Parses file content (data) for a time zone preprocessing attribute.

    Args:
      knowledge_base (KnowledgeBase): to fill with preprocessing information.
      file_object (dfvfs.FileIO): file-like object that contains the artifact
          value data.

    Returns:
      bool: True if all the preprocessing attributes were found and
          the preprocessor plugin is done.

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
        result = False
        text_file_object = dfvfs_text_file.TextFile(file_object)
        file_data = text_file_object.readline()

        time_zone = file_data.strip()
        if time_zone:
            try:
                knowledge_base.SetTimeZone(time_zone)
                result = True
            except ValueError:
                # TODO: add and store preprocessing errors.
                pass

        return result
Exemplo n.º 7
0
Arquivo: linux.py Projeto: dfjxs/plaso
    def _ParseFileData(self, mediator, file_object):
        """Parses file content (data) for system product preprocessing attribute.

    Args:
      mediator (PreprocessMediator): mediates interactions between preprocess
          plugins and other components, such as storage and knowledge base.
      file_object (dfvfs.FileIO): file-like object that contains the artifact
          value data.

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
        text_file_object = dfvfs_text_file.TextFile(file_object,
                                                    encoding='utf-8')

        product_values = {}
        for line in text_file_object.readlines():
            line = line.strip()
            if line.startswith('#'):
                continue
            key, value = line.split('=')
            key = key.strip().upper()
            value = value.strip().strip('"')
            product_values[key] = value

        if not mediator.knowledge_base.GetValue('operating_system_product'):
            system_product = product_values.get('DISTRIB_DESCRIPTION', None)
            if system_product:
                mediator.knowledge_base.SetValue('operating_system_product',
                                                 system_product)
Exemplo n.º 8
0
  def testReadlineMultipleLines(self):
    """Test the readline() function on multiple lines."""
    test_path = self._GetTestFilePath(['password.txt'])
    self._SkipIfPathNotExists(test_path)

    test_os_path_spec = path_spec_factory.Factory.NewPathSpec(
        definitions.TYPE_INDICATOR_OS, location=test_path)
    file_object = resolver.Resolver.OpenFileObject(
        test_os_path_spec, resolver_context=self._resolver_context)

    text_file_object = text_file.TextFile(file_object)

    line = text_file_object.readline()
    self.assertEqual(line, 'place,user,password\n')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 20)

    line = text_file_object.readline(size=5)
    self.assertEqual(line, 'bank,')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 25)

    line = text_file_object.readline()
    self.assertEqual(line, 'joesmith,superrich\n')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 44)

    line = text_file_object.readline()
    self.assertEqual(line, 'alarm system,-,1234\n')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 64)
Exemplo n.º 9
0
  def _ParseFileData(self, knowledge_base, file_object):
    """Parses file content (data) for a hostname preprocessing attribute.

    Args:
      knowledge_base (KnowledgeBase): to fill with preprocessing information.
      file_object (dfvfs.FileIO): file-like object that contains the artifact
          value data.

    Returns:
      bool: True if all the preprocessing attributes were found and
          the preprocessor plugin is done.

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
    result = False
    text_file_object = dfvfs_text_file.TextFile(file_object)
    hostname = text_file_object.readline()

    try:
      hostname = hostname.decode('utf-8')
    except UnicodeDecodeError:
      # TODO: add and store preprocessing errors.
      hostname = hostname.decode('utf-8', errors='replace')

    hostname = hostname.strip()
    if hostname:
      hostname_artifact = artifacts.HostnameArtifact(name=hostname)
      knowledge_base.SetHostname(hostname_artifact)
      result = True

    return result
Exemplo n.º 10
0
    def _ParseFileData(self, mediator, file_object):
        """Parses file content (data) for system product preprocessing attribute.

    Args:
      mediator (PreprocessMediator): mediates interactions between preprocess
          plugins and other components, such as storage and knowledge base.
      file_object (dfvfs.FileIO): file-like object that contains the artifact
          value data.

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
        text_file_object = dfvfs_text_file.TextFile(file_object,
                                                    encoding='utf-8')

        system_product = text_file_object.readline()

        # Only parse known default /etc/issue file contents.
        if system_product.startswith('Debian GNU/Linux '):
            system_product, _, _ = system_product.partition('\\')
            system_product = system_product.rstrip()

        else:
            system_product = None

        if system_product:
            mediator.SetValue('operating_system_product', system_product)
Exemplo n.º 11
0
  def _ParseFileObject(self, knowledge_base, file_object):
    """Parses a passwd file-like object.

    A passwd file consist of colon seperated values in the format:
    "username:password:uid:gid:full name:home directory:shell".

    Args:
      knowledge_base (KnowledgeBase): to fill with preprocessing information.
      file_object (dfvfs.FileIO): file-like object.

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
    text_file_object = text_file.TextFile(file_object)

    try:
      reader = csv.reader(text_file_object, delimiter=b':')
    except csv.Error:
      raise errors.PreProcessFail(u'Unable to read: {0:s}.'.format(self._PATH))

    for row in reader:
      if len(row) < 7 or not row[0] or not row[2]:
        # TODO: add and store preprocessing errors.
        continue

      user_account = artifacts.UserAccountArtifact(
          identifier=row[2], username=row[0])
      user_account.group_identifier = row[3] or None
      user_account.full_name = row[4] or None
      user_account.user_directory = row[5] or None
      user_account.shell = row[6] or None

      # TODO: refactor the use of store number.
      user_account.store_number = 0
      knowledge_base.SetUserAccount(user_account)
Exemplo n.º 12
0
    def _ParseContainerLogJSON(self, parser_mediator, file_object):
        """Extract events from a Docker container log files.

    The format is one JSON formatted log message per line.

    The path of each container log file (which logs the container stdout and
    stderr) is:
    DOCKER_DIR/containers/<container_id>/<container_id>-json.log

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      file_object: a file-like object.
    """
        event_attributes = {
            u'container_id': self._GetIDFromPath(parser_mediator)
        }

        text_file_object = text_file.TextFile(file_object)

        for log_line in text_file_object:
            json_log_line = json.loads(log_line)
            if u'log' in json_log_line and u'time' in json_log_line:
                event_attributes[u'log_line'] = json_log_line[u'log']
                event_attributes[u'log_source'] = json_log_line[u'stream']
                timestamp = timelib.Timestamp.FromTimeString(
                    json_log_line[u'time'])
                parser_mediator.ProduceEvent(
                    DockerJSONContainerLogEvent(timestamp, 0,
                                                event_attributes))
Exemplo n.º 13
0
    def _CreateLineReader(self, file_object):
        """Creates an object that reads lines from a text file.

    The line reader is advanced to the beginning of the DSV content, skipping
    any header lines.

    Args:
      file_object (dfvfs.FileIO): file-like object.

    Returns:
      TextFile|BinaryLineReader: an object that implements an iterator
          over lines in a text file.

    Raises:
      UnicodeDecodeError: if the file cannot be read with the specified
          encoding.
    """
        # The Python 2 csv module reads bytes and the Python 3 csv module Unicode
        # reads strings.
        if py2to3.PY_3:
            line_reader = text_file.TextFile(file_object,
                                             encoding=self._encoding,
                                             end_of_line=self._end_of_line)
        else:
            line_reader = line_reader_file.BinaryLineReader(
                file_object, end_of_line=self._end_of_line)
        # If we specifically define a number of lines we should skip, do that here.
        for _ in range(0, self.NUMBER_OF_HEADER_LINES):
            try:
                line_reader.readline(self._maximum_line_length)
            except UnicodeDecodeError:
                raise
        return line_reader
Exemplo n.º 14
0
  def testReadlineMultipleLines(self):
    """Test the readline() function on multiple lines."""
    test_file = self._GetTestFilePath(['password.txt'])
    test_path_spec = os_path_spec.OSPathSpec(location=test_file)

    file_object = os_file_io.OSFile(self._resolver_context)
    file_object.open(test_path_spec)
    text_file_object = text_file.TextFile(file_object)

    line = text_file_object.readline()
    self.assertEqual(line, 'place,user,password\n')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 20)

    line = text_file_object.readline(size=5)
    self.assertEqual(line, 'bank,')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 25)

    line = text_file_object.readline()
    self.assertEqual(line, 'joesmith,superrich\n')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 44)

    line = text_file_object.readline()
    self.assertEqual(line, 'alarm system,-,1234\n')

    offset = text_file_object.get_offset()
    self.assertEqual(offset, 64)

    file_object.close()
Exemplo n.º 15
0
  def _ParseFileData(self, knowledge_base, file_object):
    """Parses file content (data) for system product preprocessing attribute.

    Args:
      knowledge_base (KnowledgeBase): to fill with preprocessing information.
      file_object (dfvfs.FileIO): file-like object that contains the artifact
          value data.

    Returns:
      bool: True if all the preprocessing attributes were found and
          the preprocessor plugin is done.

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
    result = False
    text_file_object = dfvfs_text_file.TextFile(file_object)
    system_product = text_file_object.readline()

    try:
      system_product = system_product.decode('utf-8')
    except UnicodeDecodeError:
      # TODO: add and store preprocessing errors.
      system_product = system_product.decode('utf-8', errors='replace')

    system_product = system_product.strip()
    if system_product:
      knowledge_base.SetValue('operating_system_product', system_product)
      result = True

    return result
Exemplo n.º 16
0
    def _ParseFileData(self, knowledge_base, file_object):
        """Parses file content (data) for system product preprocessing attribute.

    Args:
      knowledge_base (KnowledgeBase): to fill with preprocessing information.
      file_object (dfvfs.FileIO): file-like object that contains the artifact
          value data.

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
        text_file_object = dfvfs_text_file.TextFile(file_object,
                                                    encoding='utf-8')

        system_product = text_file_object.readline()

        # Only parse known default /etc/issue file contents.
        if system_product.startswith('Debian GNU/Linux '):
            system_product, _, _ = system_product.partition('\\')
            system_product = system_product.rstrip()

        else:
            system_product = None

        if not knowledge_base.GetValue('operating_system_product'):
            if system_product:
                knowledge_base.SetValue('operating_system_product',
                                        system_product)
Exemplo n.º 17
0
    def _ParseFileData(self, knowledge_base, file_object):
        """Parses file content (data) for system product preprocessing attribute.

    Args:
      knowledge_base (KnowledgeBase): to fill with preprocessing information.
      file_object (dfvfs.FileIO): file-like object that contains the artifact
          value data.

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
        text_file_object = dfvfs_text_file.TextFile(file_object,
                                                    encoding='utf-8')

        product_values = {}
        for line in text_file_object.readlines():
            line = line.strip()

            # Ignore lines that do not define a key value pair.
            if '=' not in line:
                continue

            key, value = line.split('=')
            key = key.upper()
            value = value.strip('"')
            product_values[key] = value

        if not knowledge_base.GetValue('operating_system_product'):
            system_product = product_values.get('PRETTY_NAME', None)
            if system_product:
                knowledge_base.SetValue('operating_system_product',
                                        system_product)
Exemplo n.º 18
0
    def _ParseFileData(self, mediator, file_object):
        """Parses file content (data) for system product preprocessing attribute.

    Args:
      mediator (PreprocessMediator): mediates interactions between preprocess
          plugins and other components, such as storage and knowledge base.
      file_object (dfvfs.FileIO): file-like object that contains the artifact
          value data.

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
        text_file_object = dfvfs_text_file.TextFile(file_object,
                                                    encoding='utf-8')

        product_values = {}
        for line in text_file_object.readlines():
            line = line.strip()

            # Ignore lines that do not define a key value pair.
            if '=' not in line:
                continue

            key, value = line.split('=')
            key = key.upper()
            value = value.strip('"')
            product_values[key] = value

        system_product = product_values.get('PRETTY_NAME', None)
        if system_product:
            mediator.SetValue('operating_system_product', system_product)
Exemplo n.º 19
0
    def _CreateLineReader(self, file_object, encoding=None):
        """Creates an object that reads lines from a text file.

    The line reader is advanced to the beginning of the DSV content, skipping
    any header lines.

    Args:
      file_object (dfvfs.FileIO): file-like object.
      encoding (Optional[str]): encoding used in the DSV file, where None
          indicates the codepage of the parser mediator should be used.

    Returns:
      TextFile: an object that implements an iterator over lines in a text file.

    Raises:
      UnicodeDecodeError: if the file cannot be read with the specified
          encoding.
    """
        line_reader = text_file.TextFile(file_object,
                                         encoding=encoding,
                                         end_of_line=self._end_of_line)

        # pylint: disable=protected-access
        maximum_read_buffer_size = line_reader._MAXIMUM_READ_BUFFER_SIZE

        # Line length is one less than the maximum read buffer size so that we
        # tell if there's a line that doesn't end at the end before the end of
        # the file.
        if self._maximum_line_length > maximum_read_buffer_size:
            self._maximum_line_length = maximum_read_buffer_size - 1

        # If we specifically define a number of lines we should skip, do that here.
        for _ in range(0, self.NUMBER_OF_HEADER_LINES):
            line_reader.readline(self._maximum_line_length)
        return line_reader
Exemplo n.º 20
0
    def GetValue(self, searcher, unused_knowledge_base):
        """Determines the timezone based on the contents of /etc/timezone.

    Args:
      searcher: The file system searcher object (instance of
                dfvfs.FileSystemSearcher).
      knowledge_base: A knowledge base object (instance of KnowledgeBase),
                      which contains information from the source data needed
                      for parsing.

    Returns:
      A string containing a tzdata (Olsen) timezone name (for example,
      America/New_York).

    Raises:
      errors.PreProcessFail: if the preprocessing fails.
    """
        path = u'/etc/timezone'
        file_entry = self._FindFileEntry(searcher, path)
        if not file_entry:
            raise errors.PreProcessFail(
                u'Unable to find file entry for path: {0:s}.'.format(path))

        file_object = file_entry.GetFileObject()
        try:
            text_file_object = text_file.TextFile(file_object)
            file_data = text_file_object.readline()
        finally:
            file_object.close()
        return file_data.strip()
Exemplo n.º 21
0
    def ParseFileObject(self, parser_mediator, file_object, **unused_kwargs):
        """Parses a CSV text file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_entry = parser_mediator.GetFileEntry()
        path_spec_printable = file_entry.path_spec.comparable.replace(
            u'\n', u';')

        text_file_object = text_file.TextFile(file_object)

        # If we specifically define a number of lines we should skip do that here.
        for _ in range(0, self.NUMBER_OF_HEADER_LINES):
            _ = text_file_object.readline()

        reader = csv.DictReader(text_file_object,
                                fieldnames=self.COLUMNS,
                                restkey=self.MAGIC_TEST_STRING,
                                restval=self.MAGIC_TEST_STRING,
                                delimiter=self.VALUE_SEPARATOR,
                                quotechar=self.QUOTE_CHAR)

        try:
            row = reader.next()
        except (csv.Error, StopIteration):
            raise errors.UnableToParseFile(
                u'[{0:s}] Unable to parse CSV file: {1:s}.'.format(
                    self.NAME, path_spec_printable))

        number_of_columns = len(self.COLUMNS)
        number_of_records = len(row)

        if number_of_records != number_of_columns:
            raise errors.UnableToParseFile(
                (u'[{0:s}] Unable to parse CSV file: {1:s}. Wrong number of '
                 u'records (expected: {2:d}, got: {3:d})').format(
                     self.NAME, path_spec_printable, number_of_columns,
                     number_of_records))

        for key, value in row.items():
            if key == self.MAGIC_TEST_STRING or value == self.MAGIC_TEST_STRING:
                raise errors.UnableToParseFile(
                    (u'[{0:s}] Unable to parse CSV file: {1:s}. Signature '
                     u'mismatch.').format(self.NAME, path_spec_printable))

        if not self.VerifyRow(parser_mediator, row):
            raise errors.UnableToParseFile(
                (u'[{0:s}] Unable to parse CSV file: {1:s}. Verification '
                 u'failed.').format(self.NAME, path_spec_printable))

        self.ParseRow(parser_mediator, text_file_object.tell(), row)

        for row in reader:
            self.ParseRow(parser_mediator, text_file_object.tell(), row)
Exemplo n.º 22
0
    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses an Opera typed history file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)

        text_file_object = text_file.TextFile(file_object)

        # Need to verify the first line to make sure this is a) XML and
        # b) the right XML.
        first_line = text_file_object.readline(90)

        # Note that we must check the data here as a string first, otherwise
        # forcing first_line to convert to Unicode can raise a UnicodeDecodeError.
        if not first_line.startswith(b'<?xml version="1.0'):
            raise errors.UnableToParseFile(
                u'Not an Opera typed history file [not a XML]')

        # We read in the second line due to the fact that ElementTree
        # reads the entire file in memory to parse the XML string and
        # we only care about the XML file with the correct root key,
        # which denotes a typed_history.xml file.
        second_line = text_file_object.readline(50).strip()

        # Note that we must check the data here as a string first, otherwise
        # forcing second_line to convert to Unicode can raise a UnicodeDecodeError.
        if second_line != b'<typed_history>':
            raise errors.UnableToParseFile(
                u'Not an Opera typed history file [wrong XML root key]')

        # For ElementTree to work we need to work on a file object seeked
        # to the beginning.
        file_object.seek(0, os.SEEK_SET)

        xml = ElementTree.parse(file_object)

        for history_item in xml.iterfind(u'typed_history_item'):
            content = history_item.get(u'content', u'')
            last_typed = history_item.get(u'last_typed', u'')
            entry_type = history_item.get(u'type', u'')

            try:
                timestamp = timelib.Timestamp.FromTimeString(last_typed)
            except errors.TimestampError:
                parser_mediator.ProduceParseError(
                    u'Unable to parse time string: {0:s}'.format(last_typed))
                continue

            event_object = OperaTypedHistoryEvent(timestamp, content,
                                                  entry_type)
            parser_mediator.ProduceEvent(event_object)
Exemplo n.º 23
0
  def Parse(self, parser_context, file_entry):
    """Extract the Android usage-history file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
    file_object = file_entry.GetFileObject()
    file_object.seek(0, os.SEEK_SET)

    text_file_object = text_file.TextFile(file_object)

    # Need to verify the first line to make sure this is a) XML and
    # b) the right XML.
    first_line = text_file_object.readline(90)

    # Note that we must check the data here as a string first, otherwise
    # forcing first_line to convert to Unicode can raise a UnicodeDecodeError.
    if not first_line.startswith('<?xml'):
      raise errors.UnableToParseFile(
          u'Not an Android usage history file [not XML]')

    # We read in the second line due to the fact that ElementTree
    # reads the entire file in memory to parse the XML string and
    # we only care about the XML file with the correct root key,
    # which denotes a typed_history.xml file.
    second_line = text_file_object.readline(50).strip()

    if second_line != u'<usage-history>':
      raise errors.UnableToParseFile(
          u'Not an Android usage history file [wrong XML root key]')

    # For ElementTree to work we need to work on a filehandle seeked
    # to the beginning.
    file_object.seek(0, os.SEEK_SET)

    xml = ElementTree.parse(file_object)
    root = xml.getroot()

    for app in root:
      for part in app.iter():
        if part.tag == 'comp':
          package = app.get(u'name', '')
          component = part.get(u'name', '')

          try:
            last_resume_time = int(part.get('lrt', u''), 10)
          except ValueError:
            continue

          event_object = AndroidAppUsageEvent(
              last_resume_time, package, component)
          parser_context.ProduceEvent(
              event_object, parser_name=self.NAME, file_entry=file_entry)

    file_object.close()
Exemplo n.º 24
0
    def ParseFileObject(self, parser_mediator, file_object, **kwargs):
        """Parses an Android usage-history file-like object.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      file_object: A file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
        file_object.seek(0, os.SEEK_SET)
        text_file_object = text_file.TextFile(file_object)

        # Need to verify the first line to make sure this is a) XML and
        # b) the right XML.
        first_line = text_file_object.readline(90)

        # Note that we must check the data here as a string first, otherwise
        # forcing first_line to convert to Unicode can raise a UnicodeDecodeError.
        if not first_line.startswith(b'<?xml'):
            raise errors.UnableToParseFile(
                u'Not an Android usage history file [not XML]')

        # We read in the second line due to the fact that ElementTree
        # reads the entire file in memory to parse the XML string and
        # we only care about the XML file with the correct root key,
        # which denotes a typed_history.xml file.
        second_line = text_file_object.readline(50).strip()

        # Note that we must check the data here as a string first, otherwise
        # forcing second_line to convert to Unicode can raise a UnicodeDecodeError.
        if second_line != b'<usage-history>':
            raise errors.UnableToParseFile(
                u'Not an Android usage history file [wrong XML root key]')

        # The current offset of the file-like object needs to point at
        # the start of the file for ElementTree to parse the XML data correctly.
        file_object.seek(0, os.SEEK_SET)

        xml = ElementTree.parse(file_object)
        root = xml.getroot()

        for app in root:
            for part in app.iter():
                if part.tag == u'comp':
                    package = app.get(u'name', u'')
                    component = part.get(u'name', u'')

                    try:
                        last_resume_time = int(part.get(u'lrt', u''), 10)
                    except ValueError:
                        continue

                    event_object = AndroidAppUsageEvent(
                        last_resume_time, package, component)
                    parser_mediator.ProduceEvent(event_object)
Exemplo n.º 25
0
  def testReadline(self):
    """Test the readline() function."""
    file_object = os_file_io.OSFile(self._resolver_context)
    file_object.open(self._os_path_spec1)
    text_file_object = text_file.TextFile(file_object)

    self.assertEqual(text_file_object.readline(), b'This is another file.\n')

    self.assertEqual(text_file_object.get_offset(), 22)

    file_object.close()
Exemplo n.º 26
0
  def testReadlinesWithFileWithoutNewLineAtEnd(self):
    """Test reading lines from a file without a new line char at the end."""
    test_file = self._GetTestFilePath(['fls_bodyfile.txt'])
    test_file_path_spec = os_path_spec.OSPathSpec(location=test_file)
    file_object = os_file_io.OSFile(self._resolver_context)
    file_object.open(test_file_path_spec)
    text_file_object = text_file.TextFile(file_object)

    lines = text_file_object.readlines()

    self.assertEqual(len(lines), 25)
Exemplo n.º 27
0
  def _ParseFileObject(self, knowledge_base, file_object):
    """Parses a time zone file-like object.

    Args:
      knowledge_base (KnowledgeBase): to fill with preprocessing information.
      file_object (dfvfs.FileIO): file-like object.
    """
    text_file_object = text_file.TextFile(file_object)
    file_data = text_file_object.readline()

    timezone = file_data.strip()
    if timezone:
      knowledge_base.SetValue(u'time_zone_str', timezone)
Exemplo n.º 28
0
    def testReadlinesWithSizeHint(self):
        """Test the readlines() function."""
        file_object = os_file_io.OSFile(self._resolver_context)
        file_object.open(self._os_path_spec2)
        text_file_object = text_file.TextFile(file_object)

        lines = text_file_object.readlines(sizehint=60)

        self.assertEqual(len(lines), 3)
        self.assertEqual(lines[0], b'place,user,password\n')
        self.assertEqual(lines[1], b'bank,joesmith,superrich\n')
        self.assertEqual(lines[2], b'alarm system,-,1234\n')

        file_object.close()
Exemplo n.º 29
0
  def Parse(self, parser_context, file_entry):
    """Extract data from an Opera global history file.

    Args:
      parser_context: A parser context object (instance of ParserContext).
      file_entry: A file entry object (instance of dfvfs.FileEntry).
    """
    file_object = file_entry.GetFileObject()
    file_object.seek(0, os.SEEK_SET)

    text_file_object = text_file.TextFile(file_object)

    try:
      title, url, timestamp, popularity_index = self._ReadRecord(
          text_file_object, 400)
    except errors.NotAText:
      file_object.close()
      raise errors.UnableToParseFile(
          u'Not an Opera history file [not a text file].')

    if not title:
      file_object.close()
      raise errors.UnableToParseFile(
          u'Not an Opera history file [no title present].')

    if not self._IsValidUrl(url):
      file_object.close()
      raise errors.UnableToParseFile(
          u'Not an Opera history file [not a valid URL].')

    if not timestamp:
      file_object.close()
      raise errors.UnableToParseFile(
          u'Not an Opera history file [timestamp does not exist].')

    event_object = OperaGlobalHistoryEvent(
        timestamp, url, title, popularity_index)
    parser_context.ProduceEvent(
        event_object, parser_name=self.NAME, file_entry=file_entry)

    # Read in the rest of the history file.
    for title, url, timestamp, popularity_index in self._ReadRecords(
        text_file_object):
      event_object = OperaGlobalHistoryEvent(
          timestamp, url, title, popularity_index)
      parser_context.ProduceEvent(
          event_object, parser_name=self.NAME, file_entry=file_entry)

    file_object.close()
Exemplo n.º 30
0
  def testReadlinesWithFileWithoutNewLineAtEnd(self):
    """Test reading lines from a file without a new line char at the end."""
    test_path = self._GetTestFilePath(['fls_bodyfile.txt'])
    self._SkipIfPathNotExists(test_path)

    test_os_path_spec = path_spec_factory.Factory.NewPathSpec(
        definitions.TYPE_INDICATOR_OS, location=test_path)
    file_object = resolver.Resolver.OpenFileObject(
        test_os_path_spec, resolver_context=self._resolver_context)

    text_file_object = text_file.TextFile(file_object)

    lines = text_file_object.readlines()

    self.assertEqual(len(lines), 25)