def main(): """Start here.""" args = parseArgs(sys.argv[1:]) global experiment experiment = convert_to_filename(args.experiment) gbkfile = args.genome reads = args.input samples = args.samples # TODO(Test that disruption is between 0.0 and 1.0 (or absent, default 1.0)) disruption = float(args.disruption) nobarcodes = args.nobarcodes global keepall keepall = args.keepall # Organism reference files called 'genome.fna' etc organism = 'genome' # --- ORGANIZE SAMPLE LIST AND FILE PATHS --- # pipeline_organize(samples) # --- DEMULTIPLEX OR MOVE FILES IF ALREADY DEMULTIPLEXED --- # if nobarcodes: pipeline_no_demultiplex(reads) else: pipeline_demultiplex(reads) # --- BOWTIE MAPPING --- # genomeDir = 'results/{experiment}/genome_lookup/'.format(experiment=experiment) pipeline_mapping(gbkfile, organism, genomeDir, disruption) # --- ANALYSIS OF RESULTS --- # pipeline_analysis()
def main(): """Start here.""" args = parseArgs(sys.argv[1:]) global experiment experiment = convert_to_filename(args.experiment) gbkfile = args.genome reads = args.input samples = args.samples disruption = float(args.disruption) #set input disruption value as a float as input can be int if disruption < 0.0 or disruption > 1.0: #test whether disruption value is from 0.0 to 1.0 disruption = 1.0 #if disruption value is not from 0.0 to 1.0, set disruption to default value of 1.0 print('\n*** WARNING ***'\ '\nDisruption value: {}' '\nDisruption value must be from 0.0 to 1.0'\ '\nProceeding with default value of 1.0\n'.format(float(args.disruption))) nobarcodes = args.nobarcodes global keepall keepall = args.keepall # Logging of sample info global logdata logdata = {} # Organism reference files called 'genome.fna' etc organism = 'genome' # --- ORGANIZE SAMPLE LIST AND FILE PATHS --- # pipeline_organize(samples) # --- DEMULTIPLEX OR MOVE FILES IF ALREADY DEMULTIPLEXED --- # if nobarcodes: pipeline_no_demultiplex(reads) else: pipeline_demultiplex(reads) # --- BOWTIE MAPPING --- # genomeDir = 'results/{experiment}/genome_lookup/'.format(experiment=experiment) pipeline_mapping(gbkfile, organism, genomeDir, disruption) # --- ANALYSIS OF RESULTS --- # pipeline_analysis() # --- CONFIRM COMPLETION --- # print('\n===================='\ '\n* Done *'\ '\n====================\n')
def sample_prep(sample_file, barcode_qc): """ Return ordered dictionary of sample name and barcode for each sample. samples = OrderedDict([('name1', {'name': 'name1', 'barcode': 'barcode1'}), ('name2', {'name': 'name2', 'barcode': 'barcode2'})]) ignores comment lines in sample file that begin with # Exit if duplicate sample names or barcodes are identified. If barcode_qc=True, exit if duplicate barcodes are identified. """ sampleDict = collections.OrderedDict() with open(sample_file, 'r', newline='') as csvfile: sampleReader = csv.reader(csvfile, delimiter='\t') for line in sampleReader: if not line[0].startswith('#'): # sample into a string that can be a filename; barcode to uppercase new_sample = convert_to_filename(line[0]) if new_sample in sampleDict: sys.stdout.write('Error: redundant sample identifier {}'.format(new_sample)) exit(1) try: new_barcode = line[1].upper() except Exception: # Of for downstream only if samples are already demultiplexed new_barcode = '' else: if barcode_qc: if new_barcode == '': sys.stdout.write('Missing barcode for sample {}'.format(new_sample)) exit(1) if new_barcode in sampleDict.values(): sys.stdout.write('Error: redundant barcode {}'.format(new_barcode)) exit(1) sampleDict[new_sample] = { 'name': new_sample, 'barcode': new_barcode } return sampleDict