Exemplo n.º 1
0
def main(args=None):
    """
    The main method for aliquot-maf-tools.
    """ 
    # Setup logger
    Logger.setup_root_logger()

    logger = Logger.get_logger("main")

    # Print header
    logger.info('-'*75)
    logger.info("Program Args: aliquot-maf-tools " + " ".join(sys.argv[1::]))
    logger.info('Date/time: {0}'.format(datetime.datetime.now()))
    logger.info('-'*75)
    logger.info('-'*75)

    # Get args
    p = argparse.ArgumentParser("GDC Aliquot MAF Tools")
    subparsers = p.add_subparsers(dest="subcommand")
    subparsers.required = True

    VcfToAliquotMaf.add(subparsers=subparsers)
    MergeAliquotMafs.add(subparsers=subparsers) 
    MaskMergedAliquotMaf.add(subparsers=subparsers)

    options = p.parse_args(args)

    # Run
    cls = options.func(options) 
    cls.do_work()

    # Finish
    logger.info("Finished!")
Exemplo n.º 2
0
    def __init__(self, options=dict()):
        self.logger = Logger.get_logger(self.__class__.__name__)
        self.options = options

        # Maf stuff
        self.maf_header = None
        self.maf_writer = None
        self._scheme = None
        self._columns = None
        self._colset = None
Exemplo n.º 3
0
    def __init__(self, options=dict()):
        self.logger = Logger.get_logger(self.__class__.__name__)
        self.options = options

        self.maf_readers = []
        self.callers = []
        self.maf_writer = None
        self._scheme = None
        self._columns = None
        self._colset = None
        self._merger = None
Exemplo n.º 4
0
    def __init__(self, options=dict()):
        self.logger = Logger.get_logger(self.__class__.__name__)
        self.options = options

        self.maf_reader = None
        self.maf_writer = None
        self._scheme = None
        self._columns = None
        self._colset = None

        self.metrics = MafMetricsCollection()
Exemplo n.º 5
0
    def __init__(self, scheme):
        """
        Initialize the MAF record merging object which has the main `merge_records`
        function to take an `aliquotmaf.merging.overlap_set.OverlapSet`
        instance and performs merging.
        """

        self.logger = Logger.get_logger(self.__class__.__name__)
        self.scheme = scheme
        self.columns = scheme.column_names()

        self.logger.info("Loading MAF record merger...")
Exemplo n.º 6
0
 def __init__(self, name=None, source=None, scheme=None):
     self.name = None
     self.source = source
     self.scheme = scheme
     self.logger = Logger.get_logger(self.__class__.__name__)
Exemplo n.º 7
0
class GenotypeAndDepthsExtractor(Extractor):
    """Extractor class for extracting the genotype and depths based on the
       variant allele index.
    """
    logger = Logger.get_logger('GenotypeAndDepthsExtractor')

    @classmethod
    def extract(cls, var_allele_idx, genotype, alleles):
        """
        Extracts the information for the variant alleles based on the 
        variant allele index. Creates a new, updated genotype record
        and depths list.
 
        :param var_allele_idx: the variant allele index
        :param genotype: a dictionary or dictionary-like object containing
                         various possible keys like AD, DP, etc.
        :param alleles: an ordered list or tuple of the possible alleles 
                        at the locus
        :returns: an updated genotype record and depths list
        """
        depths = []
        new_gt = {}
        if not genotype['GT']: return new_gt, depths

        # If DP is defined, set it in new_gt
        if 'DP' in genotype and genotype['DP'] is not None:
            new_gt['DP'] = genotype['DP']

        # If AD is defined, then parse out all REF/ALT allele depths, or whatever is in it
        if 'AD' in genotype and genotype['AD'] is not None:
            if isinstance(genotype['AD'], int):
                depths = [genotype['AD']]
            else:
                depths = list(genotype['AD'])

        # handle VarScan VCF lines where AD contains only 1 depth, and REF allele depth is in RD
        if len(depths) == 1 and 'RD' in genotype:
            depths = [None for i in alleles]
            depths[0] = genotype['RD']
            if isinstance(genotype['AD'], int):
                depths[var_allele_idx] = genotype['AD']
            else:
                depths[var_allele_idx] = genotype['AD'][0]

        # Handle SomaticSniper VCF lines, where allele depths must be extracted from BCOUNT
        elif 'AD' not in genotype and 'BCOUNT' in genotype:
            b_idx = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
            bcount = list(genotype['BCOUNT'])
            depths = [
                bcount[b_idx[i]] if i in b_idx else None for i in alleles
            ]

        # If N depths not equal to N alleles, blank out the depths
        elif depths and len(depths) != len(alleles):
            cls.logger.warning(
                'The length of DP array != length of allele array')
            depths = [None for i in alleles]

        # Sanity check that REF/ALT allele depths are lower than total depth
        if 'DP' in genotype and genotype['DP'] is not None and ( \
          (depths[0] is not None and depths[0] > genotype['DP']) or \
          (depths[var_allele_idx] is not None and depths[var_allele_idx] > genotype['DP']) or \
          (depths[0] is not None and depths[var_allele_idx] is not None \
          and depths[0] + depths[var_allele_idx] > genotype['DP'])):
            cls.logger.warning(
                'REF/ALT allele depths are lower than total depth!!')
            new_gt['DP'] = 0
            for i in depths:
                if i and i != '.':
                    new_gt['DP'] += i

        ## If depths is empty, just set to 0, 0
        if not depths: depths = [0, 0]

        ## If we have REF/ALT allele depths but not DP, then set DP equal to sum of all ADs
        if ((depths[0] is not None and depths[var_allele_idx] is not None) \
        and ('DP' not in genotype or genotype['DP'] is None or genotype['DP'] == '.')):
            #cls.logger.warn('Missing DP field. setting DP equal to sum of ADs!!')
            new_gt['DP'] = sum([i for i in depths if i and i != '.'])

        ## Set the formatted AD and alleles
        new_gt['AD'] = tuple(
            [i if i != "" and i is not None else "." for i in depths])
        new_gt['GT'] = genotype['GT']
        depths = [i if i != "." and i is not None else 0 for i in new_gt['AD']]
        return new_gt, depths