Exemplo n.º 1
0
def detect_vcf_annotation(filepath):
    """Return the name of the annotation parser to be used on the given file

    Called: In the importer and in the project wizard to display the detected
    annotations.

    :return: "vep", "snpeff", None
    """
    if cm.is_gz_file(filepath):
        # Open .gz files in binary mode (See #84)
        device = open(filepath, "rb")
    else:
        device = open(filepath, "r")

    std_reader = vcf.VCFReader(device)
    # print(std_reader.metadata)

    if "VEP" in std_reader.metadata:
        if "CSQ" in std_reader.infos:
            device.close()
            return "vep"

    if "SnpEffVersion" in std_reader.metadata:
        if "ANN" in std_reader.infos:
            device.close()
            return "snpeff"
Exemplo n.º 2
0
    def __init__(self, filepath, *args, **kwargs):

        self.count = 0
        self.filepath = filepath

        # Autodetection of string and filepaths
        if not os.path.exists(self.filepath):
            self.is_from_string = True
            self.is_gz_file = False
        else:
            self.is_from_string = False
            self.is_gz_file = cm.is_gz_file(filepath)
Exemplo n.º 3
0
    def get_total_file_size(self) -> int:
        """Compute file size int bytes"""
        # FakeReader is used ?
        if not self.device:
            return 0

        filename = self.device.name

        if cm.is_gz_file(filename):
            return cm.get_uncompressed_size(filename)
        # Go to EOF and get position in bytes
        size = self.device.seek(0, 2)
        # Rewind the file
        self.device.seek(0)
        return size
Exemplo n.º 4
0
def create_reader(filepath):
    """Context manager that wraps the given file and return an accurate reader

    A detection of the file type is made as well as a detection of the
    annotations format if required.

    Filetypes and annotations parsers supported:

        - vcf.gz: snpeff, vep
        - vcf: snpeff, vep
        - csv, tsv, txt: vep
    """
    path = pathlib.Path(filepath)

    LOGGER.debug(
        "create_reader: PATH suffix %s, is_gz_file: %s",
        path.suffixes,
        cm.is_gz_file(filepath),
    )

    if ".vcf" in path.suffixes and ".gz" in path.suffixes:
        annotation_detected = detect_vcf_annotation(filepath)
        device = open(filepath, "rb")
        reader = VcfReader(device, annotation_parser=annotation_detected)
        yield reader
        device.close()
        return

    if ".vcf" in path.suffixes:
        annotation_detected = detect_vcf_annotation(filepath)
        device = open(filepath, "r")
        reader = VcfReader(device, annotation_parser=annotation_detected)
        yield reader
        device.close()
        return

    if {".tsv", ".csv", ".txt"} & set(path.suffixes):
        device = open(filepath, "r")
        reader = CsvReader(device)
        yield reader
        device.close()
        return

    raise Exception("create_reader:: Could not choose parser for this file.")