def fetch_stats(cls, fpath, stats_fpath): if not fpath or not os.path.isfile(fpath): raise ValueError("You need to give a valid file path if you want the stats") if os.path.isfile(stats_fpath) and not cls._is_stats_file_older_than_data(fpath, stats_fpath) and \ utils.can_read_file(stats_fpath): stats = HandleSamtoolsStats._get_stats(stats_fpath) logging.info("Reading stats from file %s" % stats_fpath) else: stats = HandleSamtoolsStats._generate_stats(fpath) logging.info("Generating stats for file %s" % fpath) if os.path.isfile(stats_fpath) and cls._is_stats_file_older_than_data(fpath, stats_fpath): logging.warning("The stats file is older than the actual file, you need to remove/update it. " "Regenerating the stats, but without saving.") return stats
def fetch_stats(cls, fpath, stats_fpath): if not fpath or not os.path.isfile(fpath): raise ValueError( "You need to give a valid file path if you want the stats") if os.path.isfile(stats_fpath) and not cls._is_stats_file_older_than_data(fpath, stats_fpath) and \ utils.can_read_file(stats_fpath): stats = HandleSamtoolsStats._get_stats(stats_fpath) logging.info("Reading stats from file %s" % stats_fpath) else: stats = HandleSamtoolsStats._generate_stats(fpath) logging.info("Generating stats for file %s" % fpath) if os.path.isfile( stats_fpath) and cls._is_stats_file_older_than_data( fpath, stats_fpath): logging.warning( "The stats file is older than the actual file, you need to remove/update it. " "Regenerating the stats, but without saving.") return stats
def compare_bam_and_cram_by_statistics(cls, bam_path, cram_path): errors = [] # Check that it's a valid file path if not bam_path or (not utils.is_irods_path(bam_path) and not os.path.isfile(bam_path)): errors.append("The BAM file path: %s is not valid" % bam_path) if not cram_path or (not utils.is_irods_path(cram_path) and not os.path.isfile(cram_path)): errors.append("The CRAM file path:%s is not valid" % cram_path) if errors: logging.error("There are errors with the file paths you provided: %s" % errors) return errors # Check that the files are readable by me if not utils.is_irods_path(bam_path) and not utils.can_read_file(bam_path): errors.append("Can't read file %s" % bam_path) if not utils.is_irods_path(cram_path) and not utils.can_read_file(cram_path): errors.append("Can't read file %s" % cram_path) if errors: logging.error("There are problems reading the files: %s" % errors) return errors # # Checking on samtools version: # version_output = RunSamtoolsCommands.get_samtools_version_output() # try: # HandleSamtoolsVersion.check_samtools_version(version_output) # except ValueError as e: # errors.append(str(e)) # return errors # Quickcheck the files before anything: try: RunSamtoolsCommands.run_samtools_quickcheck(bam_path) except RuntimeError as e: errors.append(str(e)) try: RunSamtoolsCommands.run_samtools_quickcheck(cram_path) except RuntimeError as e: errors.append(str(e)) if errors: logging.error("There are problems running quickcheck on the files you've given: %s" % errors) return errors # Calculate and compare flagstat: try: flagstat_b = RunSamtoolsCommands.get_samtools_flagstat_output(bam_path) except RuntimeError as e: errors.append(str(e)) try: flagstat_c = RunSamtoolsCommands.get_samtools_flagstat_output(cram_path) except RuntimeError as e: errors.append(str(e)) if not errors: errors.extend(cls.compare_flagstats(flagstat_b, flagstat_c)) else: logging.error("THere are problems running flagstat on the files you've given: %s" % errors) # Calculate and compare stats: stats_fpath_b = bam_path + ".stats" stats_fpath_c = cram_path + ".stats" stats_b, stats_c = None, None try: stats_b = HandleSamtoolsStats.fetch_stats(bam_path, stats_fpath_b) except (ValueError, RuntimeError) as e: errors.append(str(e)) try: stats_c = HandleSamtoolsStats.fetch_stats(cram_path, stats_fpath_c) except (ValueError, RuntimeError) as e: errors.append(str(e)) if not errors and stats_b and stats_c: errors.extend(cls.compare_stats_by_sequence_checksum(stats_b, stats_c)) else: errors.append("Can't compare samtools stats.") logging.error("For some reason I can't compare samtools stats for your files.") # Persist stats: try: if stats_b and not utils.is_irods_path(bam_path): HandleSamtoolsStats.persist_stats(stats_b, stats_fpath_b) except IOError as e: errors.append("Can't save stats to disk for %s file" % bam_path) logging.error("Can't save stats to disk for %s file" % bam_path) try: if stats_c and not utils.is_irods_path(cram_path): HandleSamtoolsStats.persist_stats(stats_c, stats_fpath_c) except IOError as e: errors.append("Can't save stats to disk for %s file" % cram_path) logging.error("Can't save stats to disk for %s file" % cram_path) return errors
def compare_bam_and_cram_by_statistics(cls, bam_path, cram_path): errors = [] # Check that it's a valid file path if not bam_path or (not utils.is_irods_path(bam_path) and not os.path.isfile(bam_path)): errors.append("The BAM file path: %s is not valid" % bam_path) if not cram_path or (not utils.is_irods_path(cram_path) and not os.path.isfile(cram_path)): errors.append("The CRAM file path:%s is not valid" % cram_path) if errors: logging.error( "There are errors with the file paths you provided: %s" % errors) return errors # Check that the files are readable by me if not utils.is_irods_path(bam_path) and not utils.can_read_file( bam_path): errors.append("Can't read file %s" % bam_path) if not utils.is_irods_path(cram_path) and not utils.can_read_file( cram_path): errors.append("Can't read file %s" % cram_path) if errors: logging.error("There are problems reading the files: %s" % errors) return errors # # Checking on samtools version: # version_output = RunSamtoolsCommands.get_samtools_version_output() # try: # HandleSamtoolsVersion.check_samtools_version(version_output) # except ValueError as e: # errors.append(str(e)) # return errors # Quickcheck the files before anything: try: RunSamtoolsCommands.run_samtools_quickcheck(bam_path) except RuntimeError as e: errors.append(str(e)) try: RunSamtoolsCommands.run_samtools_quickcheck(cram_path) except RuntimeError as e: errors.append(str(e)) if errors: logging.error( "There are problems running quickcheck on the files you've given: %s" % errors) return errors # Calculate and compare flagstat: try: flagstat_b = RunSamtoolsCommands.get_samtools_flagstat_output( bam_path) except RuntimeError as e: errors.append(str(e)) try: flagstat_c = RunSamtoolsCommands.get_samtools_flagstat_output( cram_path) except RuntimeError as e: errors.append(str(e)) if not errors: errors.extend(cls.compare_flagstats(flagstat_b, flagstat_c)) else: logging.error( "THere are problems running flagstat on the files you've given: %s" % errors) # Calculate and compare stats: stats_fpath_b = bam_path + ".stats" stats_fpath_c = cram_path + ".stats" stats_b, stats_c = None, None try: stats_b = HandleSamtoolsStats.fetch_stats(bam_path, stats_fpath_b) except (ValueError, RuntimeError) as e: errors.append(str(e)) try: stats_c = HandleSamtoolsStats.fetch_stats(cram_path, stats_fpath_c) except (ValueError, RuntimeError) as e: errors.append(str(e)) if not errors and stats_b and stats_c: errors.extend( cls.compare_stats_by_sequence_checksum(stats_b, stats_c)) else: errors.append("Can't compare samtools stats.") logging.error( "For some reason I can't compare samtools stats for your files." ) # Persist stats: try: if stats_b and not utils.is_irods_path(bam_path): HandleSamtoolsStats.persist_stats(stats_b, stats_fpath_b) except IOError as e: errors.append("Can't save stats to disk for %s file" % bam_path) logging.error("Can't save stats to disk for %s file" % bam_path) try: if stats_c and not utils.is_irods_path(cram_path): HandleSamtoolsStats.persist_stats(stats_c, stats_fpath_c) except IOError as e: errors.append("Can't save stats to disk for %s file" % cram_path) logging.error("Can't save stats to disk for %s file" % cram_path) return errors