Python SolidData.is_paired_end示例

示例#1

0

显示文件

文件： analyse_solid_run.py 项目： mypandos/genomics

def print_md5sums(solid_runs):
    """Calculate and print md5sums for primary data files

    This will generate a list of md5sums that can be passed to the
    md5sum program to check against a copy of the the runs using

    md5sum -c CHECKSUMS

    Arguments:
      solid_runs: list or tuple of SolidRun instances.
    """
    for run in solid_runs:
        for sample in run.samples:
            for library in sample.libraries:
                try:
                    print "%s  %s" % (Md5sum.md5sum(library.csfasta),
                                      strip_prefix(library.csfasta,os.getcwd()))
                except Exception,ex:
                    logging.error("FAILED for F3 csfasta: %s" % ex)
                try:
                    print "%s  %s" % (Md5sum.md5sum(library.qual),
                                      strip_prefix(library.qual,os.getcwd()))
                except Exception,ex:
                    logging.error("FAILED for F3 qual: %s" % ex)
                if SolidData.is_paired_end(run):
                    try:
                        print "%s  %s" % (Md5sum.md5sum(library.csfasta_f5),
                                          strip_prefix(library.csfasta_f5,os.getcwd()))
                    except Exception,ex:
                        logging.error("FAILED for F5 csfasta: %s" % ex)
                    try:
                        print "%s  %s" % (Md5sum.md5sum(library.qual_f5),
                                          strip_prefix(library.qual_f5,os.getcwd()))
                    except Exception,ex:
                        logging.error("FAILED for F5 qual: %s" % ex)

示例#2

0

显示文件

文件： analyse_solid_run.py 项目： mypandos/genomics

def report_run(solid_runs):
    """Print a brief report about SOLiD runs.

    This generates a brief screen report about the content of the
    supplied SOLiD runs e.g. flow cells, layout, number of samples
    etc.

    Arguments:
      solid_runs: a list or tuple of SolidRun objects to report.
    """
    # Report the data for each run
    for run in solid_runs:
        # Report overall slide layout
        slide_layout = run.slideLayout()
        title = "Flow Cell %s (%s)" % (str(run.run_info.flow_cell),
                                       str(slide_layout))
        title = title + '\n' + "="*len(title)
        print title
        print "I.D.   : %s" % (run.run_info.name)
        print "Date   : %s" % (run.run_info.date)
        print "Samples: %d\n" % len(run.samples)
        if SolidData.is_paired_end(run):
            print "Paired-end run\n"
        #
        # Report projects for each sample
        for sample in run.samples:
            title = "Sample %s" % sample
            title = title + '\n' + "-"*len(title)
            print title
            for project in sample.projects:
                libraries = project.prettyPrintLibraries()
                title = "Project %s: %s (%d libraries)" % (project.name,
                                                           libraries,
                                                           len(project.libraries))
                title = '\n' + title + '\n' + "-"*len(title)
                print title
                print "Pattern: %s/%s" % (sample,project.getLibraryNamePattern())
                # Report location of primary data
                for library in project.libraries:
                    print "%s\n%s" % (library.csfasta,library.qual)
                    if SolidData.is_paired_end(run):
                        print "%s\n%s" % (library.csfasta_f5,library.qual_f5)

示例#3

0

显示文件

文件： analyse_solid_run.py 项目： mamanambiya/genomics

def report_run(solid_runs):
    """Print a brief report about SOLiD runs.

    This generates a brief screen report about the content of the
    supplied SOLiD runs e.g. flow cells, layout, number of samples
    etc.

    Arguments:
      solid_runs: a list or tuple of SolidRun objects to report.
    """
    # Report the data for each run
    for run in solid_runs:
        # Report overall slide layout
        slide_layout = run.slideLayout()
        title = "Flow Cell %s (%s)" % (str(
            run.run_info.flow_cell), str(slide_layout))
        title = title + '\n' + "=" * len(title)
        print title
        print "I.D.   : %s" % (run.run_info.name)
        print "Date   : %s" % (run.run_info.date)
        print "Samples: %d\n" % len(run.samples)
        if SolidData.is_paired_end(run):
            print "Paired-end run\n"
        #
        # Report projects for each sample
        for sample in run.samples:
            title = "Sample %s" % sample
            title = title + '\n' + "-" * len(title)
            print title
            for project in sample.projects:
                libraries = project.prettyPrintLibraries()
                title = "Project %s: %s (%d libraries)" % (
                    project.name, libraries, len(project.libraries))
                title = '\n' + title + '\n' + "-" * len(title)
                print title
                print "Pattern: %s/%s" % (sample,
                                          project.getLibraryNamePattern())
                # Report location of primary data
                for library in project.libraries:
                    print "%s\n%s" % (library.csfasta, library.qual)
                    if SolidData.is_paired_end(run):
                        print "%s\n%s" % (library.csfasta_f5, library.qual_f5)

示例#4

0

显示文件

文件： Experiment.py 项目： fls-bioinformatics-core/genomics

 def __full_names(self, library, F5):
     """Internal: link names based on 'full' naming scheme
     """
     run = library.parent_sample.parent_run
     if not SolidData.is_paired_end(run):
         return (os.path.basename(library.csfasta), os.path.basename(library.qual))
     else:
         if not F5:
             return (os.path.basename(library.csfasta), os.path.basename(library.qual))
         else:
             return (os.path.basename(library.csfasta_f5), os.path.basename(library.qual_f5))

示例#5

0

显示文件

文件： Experiment.py 项目： fls-bioinformatics-core/genomics

 def __partial_names(self, library, F5):
     """Internal: link names based on 'partial' naming scheme
     """
     run = library.parent_sample.parent_run
     name = "_".join([run.run_info.instrument, run.run_info.datestamp, library.name])
     if not SolidData.is_paired_end(run):
         return ("%s.csfasta" % name, "%s_QV.qual" % name)
     else:
         # Add F3/F5 to distinguish the samples
         if not F5:
             return ("%s_F3.csfasta" % name, "%s_F3_QV.qual" % name)
         else:
             return ("%s_F5.csfasta" % name, "%s_F5_QV.qual" % name)

示例#6

0

显示文件

文件： Experiment.py 项目： fls-bioinformatics-core/genomics

 def __minimal_names(self, library, F5):
     """Internal: link names based on 'minimal' naming scheme
     """
     # Alternative naming schemes for primary data for links
     run = library.parent_sample.parent_run
     if not SolidData.is_paired_end(run):
         # Library names alone
         return ("%s.csfasta" % library.name, "%s.qual" % library.name)
     else:
         # Add F3/F5 to distinguish the samples
         if not F5:
             return ("%s_F3.csfasta" % library.name, "%s_F3.qual" % library.name)
         else:
             return ("%s_F5.csfasta" % library.name, "%s_F5.qual" % library.name)

示例#7

0

显示文件

文件： Experiment.py 项目： multicode/genomics

 def __full_names(self, library, F5):
     """Internal: link names based on 'full' naming scheme
     """
     run = library.parent_sample.parent_run
     if not SolidData.is_paired_end(run):
         return (os.path.basename(library.csfasta),
                 os.path.basename(library.qual))
     else:
         if not F5:
             return (os.path.basename(library.csfasta),
                     os.path.basename(library.qual))
         else:
             return (os.path.basename(library.csfasta_f5),
                     os.path.basename(library.qual_f5))

示例#8

0

显示文件

文件： Experiment.py 项目： multicode/genomics

 def __partial_names(self, library, F5):
     """Internal: link names based on 'partial' naming scheme
     """
     run = library.parent_sample.parent_run
     name = '_'.join(
         [run.run_info.instrument, run.run_info.datestamp, library.name])
     if not SolidData.is_paired_end(run):
         return ("%s.csfasta" % name, "%s_QV.qual" % name)
     else:
         # Add F3/F5 to distinguish the samples
         if not F5:
             return ("%s_F3.csfasta" % name, "%s_F3_QV.qual" % name)
         else:
             return ("%s_F5.csfasta" % name, "%s_F5_QV.qual" % name)

示例#9

0

显示文件

文件： Experiment.py 项目： multicode/genomics

 def __minimal_names(self, library, F5):
     """Internal: link names based on 'minimal' naming scheme
     """
     # Alternative naming schemes for primary data for links
     run = library.parent_sample.parent_run
     if not SolidData.is_paired_end(run):
         # Library names alone
         return ("%s.csfasta" % library.name, "%s.qual" % library.name)
     else:
         # Add F3/F5 to distinguish the samples
         if not F5:
             return ("%s_F3.csfasta" % library.name,
                     "%s_F3.qual" % library.name)
         else:
             return ("%s_F5.csfasta" % library.name,
                     "%s_F5.qual" % library.name)

示例#10

0

显示文件

文件： analyse_solid_run.py 项目： mamanambiya/genomics

def print_md5sums(solid_runs):
    """Calculate and print md5sums for primary data files

    This will generate a list of md5sums that can be passed to the
    md5sum program to check against a copy of the the runs using

    md5sum -c CHECKSUMS

    Arguments:
      solid_runs: list or tuple of SolidRun instances.
    """
    for run in solid_runs:
        for sample in run.samples:
            for library in sample.libraries:
                try:
                    print "%s  %s" % (Md5sum.md5sum(library.csfasta),
                                      strip_prefix(library.csfasta,
                                                   os.getcwd()))
                except Exception, ex:
                    logging.error("FAILED for F3 csfasta: %s" % ex)
                try:
                    print "%s  %s" % (Md5sum.md5sum(
                        library.qual), strip_prefix(library.qual, os.getcwd()))
                except Exception, ex:
                    logging.error("FAILED for F3 qual: %s" % ex)
                if SolidData.is_paired_end(run):
                    try:
                        print "%s  %s" % (Md5sum.md5sum(library.csfasta_f5),
                                          strip_prefix(library.csfasta_f5,
                                                       os.getcwd()))
                    except Exception, ex:
                        logging.error("FAILED for F5 csfasta: %s" % ex)
                    try:
                        print "%s  %s" % (Md5sum.md5sum(library.qual_f5),
                                          strip_prefix(library.qual_f5,
                                                       os.getcwd()))
                    except Exception, ex:
                        logging.error("FAILED for F5 qual: %s" % ex)

示例#11

0

显示文件

文件： Experiment.py 项目： fls-bioinformatics-core/genomics

    def buildAnalysisDirs(self, top_dir=None, dry_run=False, link_type="relative", naming_scheme="partial"):
        """Construct and populate analysis directories for the experiments

        For each defined experiment, create the required analysis directories
        and populate with links to the primary data files.

        Arguments:
          top_dir: if set then create the analysis directories as
            subdirs of the specified directory; otherwise operate in cwd
          dry_run: if True then only report the mkdir, ln etc operations that
            would be performed. Default is False (do perform the operations).
          link_type: type of link to use when linking to primary data, one of
            'relative' or 'absolute'.
          naming_scheme: naming scheme to use for links to primary data, one of
            'full' (same names as primary data files), 'partial' (cut-down version
            of the full name which excludes sample names - the default), or
            'minimal' (just the library name).
        """
        # Deal with top_dir
        if top_dir:
            if os.path.exists(top_dir):
                print "Directory %s already exists" % top_dir
            else:
                if not dry_run:
                    # Create top directory
                    print "Creating %s" % top_dir
                    utils.mkdir(top_dir, mode=0775)
                else:
                    # Report what would have been done
                    print "mkdir %s" % top_dir
        # Type of link
        if link_type == "absolute":
            use_relative_links = False
        else:
            use_relative_links = True
        # For each experiment, make and populate directory
        for expt in self.experiments:
            print "Experiment: %s %s %s/%s" % (expt.name, expt.type, expt.sample, expt.library)
            expt_dir = expt.dirname(top_dir)
            print "\tDir: %s" % expt_dir
            # Make directory
            if os.path.exists(expt_dir):
                logging.warning("Directory %s already exists" % expt_dir)
            else:
                if not dry_run:
                    # Create directory
                    utils.mkdir(expt_dir, mode=0775)
                else:
                    # Report what would have been done
                    print "mkdir %s" % expt_dir
            # Locate the primary data
            for run in self.solid_runs:
                paired_end = SolidData.is_paired_end(run)
                libraries = run.fetchLibraries(expt.sample, expt.library)
                for library in libraries:
                    # Get names for links to primary data - F3
                    ln_csfasta, ln_qual = LinkNames(naming_scheme).names(library)
                    print "\t\t%s" % ln_csfasta
                    print "\t\t%s" % ln_qual
                    # Make links to primary data
                    try:
                        self.__linkToFile(
                            library.csfasta,
                            os.path.join(expt_dir, ln_csfasta),
                            relative=use_relative_links,
                            dry_run=dry_run,
                        )
                        self.__linkToFile(
                            library.qual, os.path.join(expt_dir, ln_qual), relative=use_relative_links, dry_run=dry_run
                        )
                    except Exception, ex:
                        logging.error("Failed to link to some or all F3 primary data")
                        logging.error("Exception: %s" % ex)
                    # Get names for links to F5 reads (if paired-end run)
                    if paired_end:
                        ln_csfasta, ln_qual = LinkNames(naming_scheme).names(library, F5=True)
                        print "\t\t%s" % ln_csfasta
                        print "\t\t%s" % ln_qual
                        # Make links to F5 read data
                        try:
                            self.__linkToFile(
                                library.csfasta_f5,
                                os.path.join(expt_dir, ln_csfasta),
                                relative=use_relative_links,
                                dry_run=dry_run,
                            )
                            self.__linkToFile(
                                library.qual_f5,
                                os.path.join(expt_dir, ln_qual),
                                relative=use_relative_links,
                                dry_run=dry_run,
                            )
                        except Exception, ex:
                            logging.error("Failed to link to some or all F5 primary data")
                            logging.error("Exception: %s" % ex)

示例#12

0

显示文件

文件： Experiment.py 项目： multicode/genomics

    def buildAnalysisDirs(self,
                          top_dir=None,
                          dry_run=False,
                          link_type="relative",
                          naming_scheme="partial"):
        """Construct and populate analysis directories for the experiments

        For each defined experiment, create the required analysis directories
        and populate with links to the primary data files.

        Arguments:
          top_dir: if set then create the analysis directories as
            subdirs of the specified directory; otherwise operate in cwd
          dry_run: if True then only report the mkdir, ln etc operations that
            would be performed. Default is False (do perform the operations).
          link_type: type of link to use when linking to primary data, one of
            'relative' or 'absolute'.
          naming_scheme: naming scheme to use for links to primary data, one of
            'full' (same names as primary data files), 'partial' (cut-down version
            of the full name which excludes sample names - the default), or
            'minimal' (just the library name).
        """
        # Deal with top_dir
        if top_dir:
            if os.path.exists(top_dir):
                print "Directory %s already exists" % top_dir
            else:
                if not dry_run:
                    # Create top directory
                    print "Creating %s" % top_dir
                    bcf_utils.mkdir(top_dir, mode=0775)
                else:
                    # Report what would have been done
                    print "mkdir %s" % top_dir
        # Type of link
        if link_type == 'absolute':
            use_relative_links = False
        else:
            use_relative_links = True
        # For each experiment, make and populate directory
        for expt in self.experiments:
            print "Experiment: %s %s %s/%s" % (expt.name, expt.type,
                                               expt.sample, expt.library)
            expt_dir = expt.dirname(top_dir)
            print "\tDir: %s" % expt_dir
            # Make directory
            if os.path.exists(expt_dir):
                logging.warning("Directory %s already exists" % expt_dir)
            else:
                if not dry_run:
                    # Create directory
                    bcf_utils.mkdir(expt_dir, mode=0775)
                else:
                    # Report what would have been done
                    print "mkdir %s" % expt_dir
            # Locate the primary data
            for run in self.solid_runs:
                paired_end = SolidData.is_paired_end(run)
                libraries = run.fetchLibraries(expt.sample, expt.library)
                for library in libraries:
                    # Get names for links to primary data - F3
                    ln_csfasta, ln_qual = LinkNames(naming_scheme).names(
                        library)
                    print "\t\t%s" % ln_csfasta
                    print "\t\t%s" % ln_qual
                    # Make links to primary data
                    try:
                        self.__linkToFile(library.csfasta,
                                          os.path.join(expt_dir, ln_csfasta),
                                          relative=use_relative_links,
                                          dry_run=dry_run)
                        self.__linkToFile(library.qual,
                                          os.path.join(expt_dir, ln_qual),
                                          relative=use_relative_links,
                                          dry_run=dry_run)
                    except Exception, ex:
                        logging.error(
                            "Failed to link to some or all F3 primary data")
                        logging.error("Exception: %s" % ex)
                    # Get names for links to F5 reads (if paired-end run)
                    if paired_end:
                        ln_csfasta, ln_qual = LinkNames(naming_scheme).names(
                            library, F5=True)
                        print "\t\t%s" % ln_csfasta
                        print "\t\t%s" % ln_qual
                        # Make links to F5 read data
                        try:
                            self.__linkToFile(library.csfasta_f5,
                                              os.path.join(
                                                  expt_dir, ln_csfasta),
                                              relative=use_relative_links,
                                              dry_run=dry_run)
                            self.__linkToFile(library.qual_f5,
                                              os.path.join(expt_dir, ln_qual),
                                              relative=use_relative_links,
                                              dry_run=dry_run)
                        except Exception, ex:
                            logging.error(
                                "Failed to link to some or all F5 primary data"
                            )
                            logging.error("Exception: %s" % ex)

示例#13

0

显示文件

文件： analyse_solid_run.py 项目： mypandos/genomics

def verify_runs(solid_dirs):
    """Do basic verification checks on SOLiD run directories

    For each SOLiD run directory, create a SolidRun object and check for the
    expected sample and library directories, and that primary data files
    (csfasta and qual) have been assigned and exist.

    Returns a UNIX-like status code: 0 indicates that the checks passed,
    1 indicates that they failed.

    Arguments:
      solid_dirs: a list of SOLiD sequencing directory names.

    Returns:
      0 if the run is verified, 1 if there is a problem.
    """
    print "Performing verification"
    status = 0
    for solid_dir in solid_dirs:
        # Initialise
        run_status = 0
        run = SolidData.SolidRun(solid_dir)
        if not run:
            # Some error processing the basics
            run_status = 1
        else:
            # Check basic parameters: should have non-zero numbers of
            # samples and libraries
            if len(run.samples) == 0:
                print "No sample data"
                run_status = 1
            # Determine if run is paired-end
            paired_end = SolidData.is_paired_end(run)
            # Check libraries in each sample
            for sample in run.samples:
                if len(sample.libraries) == 0:
                    print "No libraries for sample %s" % sample.name
                    run_status = 1
                for library in sample.libraries:
                    # Check csfasta was found
                    if not library.csfasta:
                        print "No F3 csfasta for %s/%s" % \
                            (sample.name,library.name)
                        run_status = 1
                    else:
                        if not os.path.exists(library.csfasta):
                            print "Missing F3 csfasta for %s/%s" % \
                                (sample.name,library.name)
                            run_status = 1
                    # Check qual was found
                    if not library.qual:
                        print "No F3 qual for %s/%s" % \
                            (sample.name,library.name)
                        run_status = 1
                    else:
                        if not os.path.exists(library.qual):
                            print "Missing F3 qual for %s/%s" % \
                                (sample.name,library.name)
                            run_status = 1
                    # Paired-end run: check F5 reads
                    if paired_end:
                        if not library.csfasta_f5:
                            print "No F5 csfasta for %s/%s" % \
                                (sample.name,library.name)
                            run_status = 1
                        else:
                            if not os.path.exists(library.csfasta_f5):
                                print "Missing F5 csfasta for %s/%s" % \
                                    (sample.name,library.name)
                                run_status = 1
                        # Check for F5 qual
                        if not library.qual_f5:
                            print "No F5 qual for %s/%s" % \
                                (sample.name,library.name)
                            run_status = 1
                        else:
                            if not os.path.exists(library.qual_f5):
                                print "Missing F5 qual for %s/%s" % \
                                    (sample.name,library.name)
                                run_status = 1
        # Completed checks for run
        print "%s:" % run.run_name,
        if run_status == 0:
            print " [PASSED]"
        else:
            print " [FAILED]"
            status = 1
    # Completed
    print "\nOverall status:",
    if status == 0:
        print " [PASSED]"
    else:
        print " [FAILED]"
    return status

示例#14

0

显示文件

文件： analyse_solid_run.py 项目： mamanambiya/genomics

def verify_runs(solid_dirs):
    """Do basic verification checks on SOLiD run directories

    For each SOLiD run directory, create a SolidRun object and check for the
    expected sample and library directories, and that primary data files
    (csfasta and qual) have been assigned and exist.

    Returns a UNIX-like status code: 0 indicates that the checks passed,
    1 indicates that they failed.

    Arguments:
      solid_dirs: a list of SOLiD sequencing directory names.

    Returns:
      0 if the run is verified, 1 if there is a problem.
    """
    print "Performing verification"
    status = 0
    for solid_dir in solid_dirs:
        # Initialise
        run_status = 0
        run = SolidData.SolidRun(solid_dir)
        if not run:
            # Some error processing the basics
            run_status = 1
        else:
            # Check basic parameters: should have non-zero numbers of
            # samples and libraries
            if len(run.samples) == 0:
                print "No sample data"
                run_status = 1
            # Determine if run is paired-end
            paired_end = SolidData.is_paired_end(run)
            # Check libraries in each sample
            for sample in run.samples:
                if len(sample.libraries) == 0:
                    print "No libraries for sample %s" % sample.name
                    run_status = 1
                for library in sample.libraries:
                    # Check csfasta was found
                    if not library.csfasta:
                        print "No F3 csfasta for %s/%s" % \
                            (sample.name,library.name)
                        run_status = 1
                    else:
                        if not os.path.exists(library.csfasta):
                            print "Missing F3 csfasta for %s/%s" % \
                                (sample.name,library.name)
                            run_status = 1
                    # Check qual was found
                    if not library.qual:
                        print "No F3 qual for %s/%s" % \
                            (sample.name,library.name)
                        run_status = 1
                    else:
                        if not os.path.exists(library.qual):
                            print "Missing F3 qual for %s/%s" % \
                                (sample.name,library.name)
                            run_status = 1
                    # Paired-end run: check F5 reads
                    if paired_end:
                        if not library.csfasta_f5:
                            print "No F5 csfasta for %s/%s" % \
                                (sample.name,library.name)
                            run_status = 1
                        else:
                            if not os.path.exists(library.csfasta_f5):
                                print "Missing F5 csfasta for %s/%s" % \
                                    (sample.name,library.name)
                                run_status = 1
                        # Check for F5 qual
                        if not library.qual_f5:
                            print "No F5 qual for %s/%s" % \
                                (sample.name,library.name)
                            run_status = 1
                        else:
                            if not os.path.exists(library.qual_f5):
                                print "Missing F5 qual for %s/%s" % \
                                    (sample.name,library.name)
                                run_status = 1
        # Completed checks for run
        print "%s:" % run.run_name,
        if run_status == 0:
            print " [PASSED]"
        else:
            print " [FAILED]"
            status = 1
    # Completed
    print "\nOverall status:",
    if status == 0:
        print " [PASSED]"
    else:
        print " [FAILED]"
    return status