def get_bases_mask(run_info_xml,sample_sheet_file):
    """
    Get bases mask string

    Generates initial bases mask based on data in RunInfo.xml (which
    says how many reads there are, how many cycles in each read, and
    which are index reads). Then updates this using the barcode
    information in the sample sheet file.

    Arguments:
      run_info_xml: name and path of RunInfo.xml file from the
        sequencing run
      sample_sheet_file: name and path of sample sheet file.

    Returns:
      Bases mask string e.g. 'y101,I6'. 

    """
    # Get initial bases mask
    bases_mask = IlluminaData.IlluminaRunInfo(run_info_xml).bases_mask
    print "Bases mask: %s (from RunInfo.xml)" % bases_mask
    # Update bases mask from sample sheet
    example_barcode = IlluminaData.get_casava_sample_sheet(sample_sheet_file)[0]['Index']
    bases_mask = IlluminaData.fix_bases_mask(bases_mask,example_barcode)
    print "Bases mask: %s (updated for barcode sequence '%s')" % (bases_mask,
                                                                  example_barcode)
    return bases_mask
Пример #2
0
def get_bases_mask(run_info_xml, sample_sheet_file):
    """
    Get bases mask string

    Generates initial bases mask based on data in RunInfo.xml (which
    says how many reads there are, how many cycles in each read, and
    which are index reads). Then updates this using the barcode
    information in the sample sheet file.

    Arguments:
      run_info_xml: name and path of RunInfo.xml file from the
        sequencing run
      sample_sheet_file: name and path of sample sheet file.

    Returns:
      Bases mask string e.g. 'y101,I6'. 

    """
    # Get initial bases mask
    bases_mask = IlluminaData.IlluminaRunInfo(run_info_xml).bases_mask
    print "Bases mask: %s (from RunInfo.xml)" % bases_mask
    # Update bases mask from sample sheet
    example_barcode = IlluminaData.get_casava_sample_sheet(
        sample_sheet_file)[0]['Index']
    bases_mask = IlluminaData.fix_bases_mask(bases_mask, example_barcode)
    print "Bases mask: %s (updated for barcode sequence '%s')" % (
        bases_mask, example_barcode)
    return bases_mask
Пример #3
0
 p.add_option('-N','--nprocessors',action="store",dest="cores",default=1,type='int',
              help="spread work across multiple processors/cores (default is 1)")
 options,args = p.parse_args()
 # Check arguments
 if not args and options.counts_file_in is None:
     p.error("Need to supply at least one input Fastq file, a bclToFastq output "
             "directory, or a counts file from a previous run (if using -c)")
 if options.report_file is not None:
     print "Writing report to %s" % options.report_file
     fp = open(options.report_file,'w')
 else:
     fp = sys.stdout
 # Handle input sample sheet
 if options.sample_sheet is not None:
     print "Loading sample sheet data from %s" % options.sample_sheet
     sample_sheet = IlluminaData.get_casava_sample_sheet(options.sample_sheet)
 # Process according to inputs
 if options.counts_file_in:
     # Use counts from a previously generated file
     counts_file = options.counts_file_in
     print "Loading counts from %s" % counts_file
     counts = dict()
     for line in open(counts_file,'r'):
         seq = line.split('\t')[1]
         count = int(line.split('\t')[2])
         counts[seq] = count
     report(counts,nseqs=options.n,cutoff=options.cutoff,fp=fp)
     # Match barcodes to index sequences in sample sheet
     if options.sample_sheet:
         if options.lanes is not None:
             lanes = [int(lane) for lane in options.lanes.split(',')]
Пример #4
0
 p.add_option_group(deprecated_options)
 # Process command line
 options, args = p.parse_args()
 if len(args) != 1:
     p.error("input is a single SampleSheet.csv file")
 if options.miseq:
     logging.warning(
         "--miseq option no longer necessary; MiSEQ-style sample sheets "
         "are now converted automatically")
 # Get input sample sheet file
 samplesheet = args[0]
 if not os.path.isfile(samplesheet):
     logging.error("sample sheet '%s': not found" % samplesheet)
     sys.exit(1)
 # Read in the data as CSV
 data = IlluminaData.get_casava_sample_sheet(samplesheet)
 # Remove lanes
 if options.lanes is not None:
     lanes = parse_lane_expression(options.lanes)
     print "Keeping lanes %s, removing the rest" % ','.join(
         [str(x) for x in lanes])
     new_data = IlluminaData.CasavaSampleSheet()
     for line in data:
         if line['Lane'] in lanes:
             print "Keeping %s" % line
             new_data.append(tabdata="%s" % line)
     data = new_data
 # Update the SampleID and SampleProject fields
 for sample_id in options.sample_id:
     lanes, name = parse_name_expression(sample_id)
     for line in data:
Пример #5
0
                               "if required)")
 p.add_option_group(deprecated_options)
 # Process command line
 options,args = p.parse_args()
 if len(args) != 1:
     p.error("input is a single SampleSheet.csv file")
 if options.miseq:
     logging.warning("--miseq option no longer necessary; MiSEQ-style sample sheets "
                     "are now converted automatically")
 # Get input sample sheet file
 samplesheet = args[0]
 if not os.path.isfile(samplesheet):
     logging.error("sample sheet '%s': not found" % samplesheet)
     sys.exit(1)
 # Read in the data as CSV
 data = IlluminaData.get_casava_sample_sheet(samplesheet)
 # Remove lanes
 if options.lanes is not None:
     lanes = parse_lane_expression(options.lanes)
     print "Keeping lanes %s, removing the rest" % ','.join([str(x) for x in lanes])
     new_data = IlluminaData.CasavaSampleSheet()
     for line in data:
         if line['Lane'] in lanes:
             print "Keeping %s" % line
             new_data.append(tabdata="%s" % line)
     data = new_data
 # Update the SampleID and SampleProject fields
 for sample_id in options.sample_id:
     lanes,name = parse_name_expression(sample_id)
     for line in data:
         if line['Lane'] in lanes: