Exemplo n.º 1
0
    def sniff_prefix(self, file_prefix: FilePrefix):
        """
        Determines whether the file is an amos assembly file format
        Example::

          {CTG
          iid:1
          eid:1
          seq:
          CCTCTCCTGTAGAGTTCAACCGA-GCCGGTAGAGTTTTATCA
          .
          qlt:
          DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD
          .
          {TLE
          src:1027
          off:0
          clr:618,0
          gap:
          250 612
          .
          }
          }
        """
        for line in file_prefix.line_iterator():
            if not line:
                break  # EOF
            line = line.strip()
            if line:  # first non-empty line
                if line.startswith('{'):
                    if re.match(r'{(RED|CTG|TLE)$', line):
                        return True
        return False
Exemplo n.º 2
0
 def _has_root_element_in_prefix(self, file_prefix: FilePrefix, root):
     for line in file_prefix.line_iterator():
         if not line.startswith('<?'):
             break
     # pattern match <root or <ns:root for any ns string
     pattern = r'^<(\w*:)?%s' % root
     return re.match(pattern, line) is not None
Exemplo n.º 3
0
    def sniff_prefix(self, file_prefix: FilePrefix):
        """Each file must have one or more data blocks.
        The start of a data block is defined by the keyword
        "data_" followed by an optional string for
        identification (e.g., "data_images").  All text
        before the first "data_" keyword are comments

        >>> from galaxy.datatypes.sniff import get_test_fname
        >>> fname = get_test_fname('1.star')
        >>> Star().sniff(fname)
        True
        >>> fname = get_test_fname('interval.interval')
        >>> Star().sniff(fname)
        False
        """
        in_data_block = False
        for line in file_prefix.line_iterator():
            # All lines before the first
            # data_ block must be comments.
            line = line.strip()
            if len(line) == 0:
                continue
            if line.startswith("data_"):
                in_data_block = True
                continue
            if in_data_block:
                # Lines within data blocks must
                # be blank, start with loop_, or
                # start with _.
                if len(line) == 0:
                    continue
                if line.startswith("loop_") or line.startswith("_"):
                    return True
                return False
        return False
Exemplo n.º 4
0
 def sniff_prefix(self, file_prefix: FilePrefix):
     """ Determines whether the file is the correct XML type. """
     for line in file_prefix.line_iterator():
         line = line.strip()
         if not line.startswith('<?'):
             break
     # pattern match <root or <ns:root for any ns string
     pattern = r'<(\w*:)?%s' % self.root
     return re.search(pattern, line) is not None
Exemplo n.º 5
0
 def sniff_prefix(self, file_prefix: FilePrefix):
     """
     Checking for keyword - 'Collection' or 'Image' in the first 200 lines.
     >>> from galaxy.datatypes.sniff import get_test_fname
     >>> fname = get_test_fname('1.dzi')
     >>> Dzi().sniff(fname)
     True
     >>> fname = get_test_fname('megablast_xml_parser_test1.blastxml')
     >>> Dzi().sniff(fname)
     False
     """
     for line in file_prefix.line_iterator():
         line = line.lower()
         if line.find('<collection') >= 0 or line.find('<image') >= 0:
             return True
     return False
Exemplo n.º 6
0
 def sniff_prefix(self, file_prefix: FilePrefix):
     sep = None
     header = None
     for idx, line in enumerate(file_prefix.line_iterator()):
         line = line.strip()
         if sep is None:
             sep = self._parse_delimiter(line)
             if sep is None:
                 return False
         line = line.split(sep)
         if len(line) != 3:
             return False
         if idx == 0:
             header = self._parse_header(line)
             if (header is None) and not self._parse_dataline(line):
                 return False
         elif not self._parse_dataline(line):
             return False
     if sep is None or header is None:
         return False
     return True
Exemplo n.º 7
0
    def sniff_prefix(self, file_prefix: FilePrefix):
        sep = None
        tpe = None
        for idx, line in enumerate(file_prefix.line_iterator()):
            line = line.strip("\r\n")
            if sep is None:
                sep = self._parse_delimiter(line)
                if sep is None:
                    return False
            line = line.split(sep)

            if idx == 0:
                tpe = self._parse_type(line)
                if tpe is None:
                    return False
                elif tpe == 0 and not self._parse_dataline(line, tpe):
                    return False
            elif not self._parse_dataline(line, tpe):
                return False
        if tpe is None:
            return False
        return True
Exemplo n.º 8
0
    def sniff_prefix(self, file_prefix: FilePrefix):
        """ Determines whether the file is a valid ms2 file."""
        header_lines = []
        for line in file_prefix.line_iterator():
            if line.strip() == "":
                continue
            elif line.startswith('H\t'):
                header_lines.append(line)
            else:
                break

        for header_field in [
                'CreationDate', 'Extractor', 'ExtractorVersion',
                'ExtractorOptions'
        ]:
            found_header = False
            for header_line in header_lines:
                if header_line.startswith(f'H\t{header_field}'):
                    found_header = True
                    break
            if not found_header:
                return False

        return True