예제 #1
0
def main():
    """Start here."""
    args = parseArgs(sys.argv[1:])
    global experiment
    experiment = convert_to_filename(args.experiment)
    gbkfile = args.genome
    reads = args.input
    samples = args.samples
    # TODO(Test that disruption is between 0.0 and 1.0 (or absent, default 1.0))
    disruption = float(args.disruption)
    nobarcodes = args.nobarcodes
    global keepall
    keepall = args.keepall
    # Organism reference files called 'genome.fna' etc
    organism = 'genome'

    # --- ORGANIZE SAMPLE LIST AND FILE PATHS --- #
    pipeline_organize(samples)

    # --- DEMULTIPLEX OR MOVE FILES IF ALREADY DEMULTIPLEXED --- #
    if nobarcodes:
        pipeline_no_demultiplex(reads)
    else:
        pipeline_demultiplex(reads)

    # --- BOWTIE MAPPING --- #
    genomeDir = 'results/{experiment}/genome_lookup/'.format(experiment=experiment)
    pipeline_mapping(gbkfile, organism, genomeDir, disruption)

    # --- ANALYSIS OF RESULTS --- #
    pipeline_analysis()
예제 #2
0
def main():
    """Start here."""
    args = parseArgs(sys.argv[1:])
    global experiment
    experiment = convert_to_filename(args.experiment)
    gbkfile = args.genome
    reads = args.input
    samples = args.samples
    disruption = float(args.disruption) #set input disruption value as a float as input can be int
    if disruption < 0.0 or disruption > 1.0: #test whether disruption value is from 0.0 to 1.0
        disruption = 1.0 #if disruption value is not from 0.0 to 1.0, set disruption to default value of 1.0
        print('\n*** WARNING ***'\
              '\nDisruption value: {}'
              '\nDisruption value must be from 0.0 to 1.0'\
              '\nProceeding with default value of 1.0\n'.format(float(args.disruption)))
    nobarcodes = args.nobarcodes
    global keepall
    keepall = args.keepall
    # Logging of sample info
    global logdata
    logdata = {}
    # Organism reference files called 'genome.fna' etc
    organism = 'genome'

    # --- ORGANIZE SAMPLE LIST AND FILE PATHS --- #
    pipeline_organize(samples)

    # --- DEMULTIPLEX OR MOVE FILES IF ALREADY DEMULTIPLEXED --- #
    if nobarcodes:
        pipeline_no_demultiplex(reads)
    else:
        pipeline_demultiplex(reads)

    # --- BOWTIE MAPPING --- #
    genomeDir = 'results/{experiment}/genome_lookup/'.format(experiment=experiment)
    pipeline_mapping(gbkfile, organism, genomeDir, disruption)

    # --- ANALYSIS OF RESULTS --- #
    pipeline_analysis()


    # --- CONFIRM COMPLETION --- #
    print('\n===================='\
          '\n*       Done       *'\
          '\n====================\n')
예제 #3
0
def sample_prep(sample_file, barcode_qc):
    """
    Return ordered dictionary of sample name and barcode for each sample.

    samples = OrderedDict([('name1', {'name': 'name1', 'barcode': 'barcode1'}),
        ('name2', {'name': 'name2', 'barcode': 'barcode2'})])
    ignores comment lines in sample file that begin with #

    Exit if duplicate sample names or barcodes are identified.
    If barcode_qc=True, exit if duplicate barcodes are identified.
    """
    sampleDict = collections.OrderedDict()
    with open(sample_file, 'r', newline='') as csvfile:
        sampleReader = csv.reader(csvfile, delimiter='\t')
        for line in sampleReader:
            if not line[0].startswith('#'):
                # sample into a string that can be a filename; barcode to uppercase
                new_sample = convert_to_filename(line[0])
                if new_sample in sampleDict:
                    sys.stdout.write('Error: redundant sample identifier {}'.format(new_sample))
                    exit(1)
                try:
                    new_barcode = line[1].upper()
                except Exception:
                    # Of for downstream only if samples are already demultiplexed
                    new_barcode = ''
                else:
                    if barcode_qc:
                        if new_barcode == '':
                            sys.stdout.write('Missing barcode for sample {}'.format(new_sample))
                            exit(1)
                        if new_barcode in sampleDict.values():
                            sys.stdout.write('Error: redundant barcode {}'.format(new_barcode))
                            exit(1)
                sampleDict[new_sample] = {
                    'name': new_sample,
                    'barcode': new_barcode
                }
    return sampleDict