Exemplo n.º 1
0
def main():
    args = parse_args()
    log_level = (logging.CRITICAL - 10 * args.v) if args.v else logging.INFO
    log_file = args.log if args.log else 'compare_b2c.log'
    logging.basicConfig(level=log_level, format='%(levelname)s - %(asctime)s %(message)s', filename=log_file)
    if args.b and args.c:
        bam_path = args.b
        cram_path = args.c

        if not utils.is_irods_path(bam_path) and not os.path.isfile(bam_path):
            logging.error("This is not a file path: %s" % bam_path)
            #sys.exit(1)
            raise ValueError("This is not a file path: %s")
        if not utils.is_irods_path(cram_path) and not os.path.isfile(cram_path):
            logging.error("This is not a file path: %s" % cram_path)
            #sys.exit(1)
            raise ValueError("This is not a file path: %s")

        errors = CompareStatsForFiles.compare_bam_and_cram_by_statistics(bam_path, cram_path)
        if errors:
            if args.e:
                err_f = open(args.e, 'w')
                for err in errors:
                    err_f.write(err + '\n')
                err_f.close()
            else:
                print(errors)
            sys.exit(1)
        else:
            logging.info("There were no errors and no differences between the stats for the 2 files.")
Exemplo n.º 2
0
def main():
    args = parse_args()
    log_level = (logging.CRITICAL - 10 * args.v) if args.v else logging.INFO
    log_file = args.log if args.log else 'compare_b2c.log'
    logging.basicConfig(level=log_level,
                        format='%(levelname)s - %(asctime)s %(message)s',
                        filename=log_file)
    if args.b and args.c:
        bam_path = args.b
        cram_path = args.c
        #begin david
        ref_path = ''
        srun = ''
        if args.r:
            ref_path = args.r
            if not os.path.isfile(ref_path):
                logging.error("This is not a file path: %s" % ref_path)
                raise ValueError("This is not a file path: %s")
        if args.s:
            srun = "srun"
        #end david

        if not utils.is_irods_path(bam_path) and not os.path.isfile(bam_path):
            logging.error("This is not a file path: %s" % bam_path)
            #sys.exit(1)
            raise ValueError("This is not a file path: %s")
        if not utils.is_irods_path(cram_path) and not os.path.isfile(
                cram_path):
            logging.error("This is not a file path: %s" % cram_path)
            #sys.exit(1)
            raise ValueError("This is not a file path: %s")

        errors = CompareStatsForFiles.compare_bam_and_cram_by_statistics(
            bam_path, cram_path, srun, ref_path)
        if errors:
            if args.e:
                err_f = open(args.e, 'w')
                for err in errors:
                    err_f.write(err + '\n')
                err_f.close()
            else:
                print(errors)
            sys.exit(1)
        else:
            logging.info(
                "There were no errors and no differences between the stats for the 2 files."
            )
Exemplo n.º 3
0
    def compare_bam_and_cram_by_statistics(cls, bam_path, cram_path):
        errors = []
        # Check that it's a valid file path
        if not bam_path or (not utils.is_irods_path(bam_path) and not os.path.isfile(bam_path)):
            errors.append("The BAM file path: %s is not valid" % bam_path)
        if not cram_path or (not utils.is_irods_path(cram_path) and not os.path.isfile(cram_path)):
            errors.append("The CRAM file path:%s is not valid" % cram_path)
        if errors:
            logging.error("There are errors with the file paths you provided: %s" % errors)
            return errors

        # Check that the files are readable by me
        if not utils.is_irods_path(bam_path) and not utils.can_read_file(bam_path):
            errors.append("Can't read file %s" % bam_path)
        if not utils.is_irods_path(cram_path) and not utils.can_read_file(cram_path):
            errors.append("Can't read file %s" % cram_path)
        if errors:
            logging.error("There are problems reading the files: %s" % errors)
            return errors

        # # Checking on samtools version:
        # version_output = RunSamtoolsCommands.get_samtools_version_output()
        # try:
        #     HandleSamtoolsVersion.check_samtools_version(version_output)
        # except ValueError as e:
        #     errors.append(str(e))
        #     return errors

        # Quickcheck the files before anything:
        try:
            RunSamtoolsCommands.run_samtools_quickcheck(bam_path)
        except RuntimeError as e:
            errors.append(str(e))

        try:
            RunSamtoolsCommands.run_samtools_quickcheck(cram_path)
        except RuntimeError as e:
            errors.append(str(e))
        if errors:
            logging.error("There are problems running quickcheck on the files you've given: %s" % errors)
            return errors

        # Calculate and compare flagstat:
        try:
            flagstat_b = RunSamtoolsCommands.get_samtools_flagstat_output(bam_path)
        except RuntimeError as e:
            errors.append(str(e))

        try:
            flagstat_c = RunSamtoolsCommands.get_samtools_flagstat_output(cram_path)
        except RuntimeError as e:
            errors.append(str(e))

        if not errors:
            errors.extend(cls.compare_flagstats(flagstat_b, flagstat_c))
        else:
            logging.error("THere are problems running flagstat on the files you've given: %s" % errors)

        # Calculate and compare stats:
        stats_fpath_b = bam_path + ".stats"
        stats_fpath_c = cram_path + ".stats"
        stats_b, stats_c = None, None
        try:
            stats_b = HandleSamtoolsStats.fetch_stats(bam_path, stats_fpath_b)
        except (ValueError, RuntimeError) as e:
            errors.append(str(e))

        try:
            stats_c = HandleSamtoolsStats.fetch_stats(cram_path, stats_fpath_c)
        except (ValueError, RuntimeError) as e:
            errors.append(str(e))

        if not errors and stats_b and stats_c:
            errors.extend(cls.compare_stats_by_sequence_checksum(stats_b, stats_c))
        else:
            errors.append("Can't compare samtools stats.")
            logging.error("For some reason I can't compare samtools stats for your files.")

        # Persist stats:
        try:
            if stats_b and not utils.is_irods_path(bam_path):
                HandleSamtoolsStats.persist_stats(stats_b, stats_fpath_b)
        except IOError as e:
            errors.append("Can't save stats to disk for %s file" % bam_path)
            logging.error("Can't save stats to disk for %s file" % bam_path)

        try:
            if stats_c and not utils.is_irods_path(cram_path):
                HandleSamtoolsStats.persist_stats(stats_c, stats_fpath_c)
        except IOError as e:
            errors.append("Can't save stats to disk for %s file" % cram_path)
            logging.error("Can't save stats to disk for %s file" % cram_path)
        return errors
Exemplo n.º 4
0
    def compare_bam_and_cram_by_statistics(cls, bam_path, cram_path):
        errors = []
        # Check that it's a valid file path
        if not bam_path or (not utils.is_irods_path(bam_path)
                            and not os.path.isfile(bam_path)):
            errors.append("The BAM file path: %s is not valid" % bam_path)
        if not cram_path or (not utils.is_irods_path(cram_path)
                             and not os.path.isfile(cram_path)):
            errors.append("The CRAM file path:%s is not valid" % cram_path)
        if errors:
            logging.error(
                "There are errors with the file paths you provided: %s" %
                errors)
            return errors

        # Check that the files are readable by me
        if not utils.is_irods_path(bam_path) and not utils.can_read_file(
                bam_path):
            errors.append("Can't read file %s" % bam_path)
        if not utils.is_irods_path(cram_path) and not utils.can_read_file(
                cram_path):
            errors.append("Can't read file %s" % cram_path)
        if errors:
            logging.error("There are problems reading the files: %s" % errors)
            return errors

        # # Checking on samtools version:
        # version_output = RunSamtoolsCommands.get_samtools_version_output()
        # try:
        #     HandleSamtoolsVersion.check_samtools_version(version_output)
        # except ValueError as e:
        #     errors.append(str(e))
        #     return errors

        # Quickcheck the files before anything:
        try:
            RunSamtoolsCommands.run_samtools_quickcheck(bam_path)
        except RuntimeError as e:
            errors.append(str(e))

        try:
            RunSamtoolsCommands.run_samtools_quickcheck(cram_path)
        except RuntimeError as e:
            errors.append(str(e))
        if errors:
            logging.error(
                "There are problems running quickcheck on the files you've given: %s"
                % errors)
            return errors

        # Calculate and compare flagstat:
        try:
            flagstat_b = RunSamtoolsCommands.get_samtools_flagstat_output(
                bam_path)
        except RuntimeError as e:
            errors.append(str(e))

        try:
            flagstat_c = RunSamtoolsCommands.get_samtools_flagstat_output(
                cram_path)
        except RuntimeError as e:
            errors.append(str(e))

        if not errors:
            errors.extend(cls.compare_flagstats(flagstat_b, flagstat_c))
        else:
            logging.error(
                "THere are problems running flagstat on the files you've given: %s"
                % errors)

        # Calculate and compare stats:
        stats_fpath_b = bam_path + ".stats"
        stats_fpath_c = cram_path + ".stats"
        stats_b, stats_c = None, None
        try:
            stats_b = HandleSamtoolsStats.fetch_stats(bam_path, stats_fpath_b)
        except (ValueError, RuntimeError) as e:
            errors.append(str(e))

        try:
            stats_c = HandleSamtoolsStats.fetch_stats(cram_path, stats_fpath_c)
        except (ValueError, RuntimeError) as e:
            errors.append(str(e))

        if not errors and stats_b and stats_c:
            errors.extend(
                cls.compare_stats_by_sequence_checksum(stats_b, stats_c))
        else:
            errors.append("Can't compare samtools stats.")
            logging.error(
                "For some reason I can't compare samtools stats for your files."
            )

        # Persist stats:
        try:
            if stats_b and not utils.is_irods_path(bam_path):
                HandleSamtoolsStats.persist_stats(stats_b, stats_fpath_b)
        except IOError as e:
            errors.append("Can't save stats to disk for %s file" % bam_path)
            logging.error("Can't save stats to disk for %s file" % bam_path)

        try:
            if stats_c and not utils.is_irods_path(cram_path):
                HandleSamtoolsStats.persist_stats(stats_c, stats_fpath_c)
        except IOError as e:
            errors.append("Can't save stats to disk for %s file" % cram_path)
            logging.error("Can't save stats to disk for %s file" % cram_path)
        return errors