def store_stats(self): "Write stats and stores to Assembly object." # out file self.data.stats_files.s1 = os.path.join(self.data.dirs.fastqs, 's1_demultiplex_stats.txt') outfile = open(self.data.stats_files.s1, 'w') # write the header for file stats ------------------------------------ outfile.write("{:<35} {:>13}{:>13}{:>13}\n".format( "raw_file", "total_reads", "cut_found", "bar_matched")) # write the file stats r1names = sorted(self.stats.perfile) for fname in r1names: dat = self.stats.perfile[fname] outfile.write("{:<35} {:>13}{:>13}{:>13}\n".format( fname, dat[0], dat[1], dat[2])) # repeat for pairfile if 'pair' in self.data.params.datatype: fname = fname.replace("_R1_", "_R2_") outfile.write("{:<35} {:>13}{:>13}{:>13}\n".format( fname, dat[0], dat[1], dat[2])) # spacer, how many records for each sample -------------------------- outfile.write("\n{:<35} {:>13}\n".format("sample_name", "total_reads")) # names alphabetical. Write to file. Will save again below to Samples. snames = set() for sname in self.data.barcodes: if "-technical-replicate-" in sname: sname = sname.rsplit("-technical-replicate", 1)[0] snames.add(sname) for sname in sorted(list(snames)): outfile.write("{:<35} {:>13}\n".format( sname, self.stats.fsamplehits[sname])) ## spacer, which barcodes were found ----------------------------------- outfile.write('\n{:<35} {:>13} {:>13} {:>13}\n'.format( "sample_name", "true_bar", "obs_bar", "N_records")) ## write sample results for sname in sorted(self.data.barcodes): if "-technical-replicate-" in sname: fname = sname.rsplit("-technical-replicate", 1)[0] else: fname = sname # write perfect hit hit = self.data.barcodes[sname] offhitstring = "" # write off-n hits # sort list of off-n hits if fname in self.stats.fdbars: offkeys = list(self.stats.fdbars.get(fname)) for offhit in offkeys[::-1]: # exclude perfect hit if offhit not in self.data.barcodes.values(): offhitstring += ( "{:<35} {:>13} {:>13} {:>13}\n".format( sname, hit, offhit, int(self.stats.fbarhits[offhit] / 2))) #sumoffhits += fbarhits[offhit] # write string to file outfile.write("{:<35} {:>13} {:>13} {:>13}\n".format( sname, hit, hit, int(self.stats.fbarhits[hit] / 2))) outfile.write(offhitstring) # write misses misskeys = list(self.stats.fmisses.keys()) misskeys.sort(key=self.stats.fmisses.get) for key in misskeys[::-1]: outfile.write('{:<35} {:>13} {:>13} {:>13}\n'.format( "no_match", "_", key, self.stats.fmisses[key])) outfile.close() # Link Sample with this data file to the Assembly object for sname in snames: # make the sample sample = Sample(sname) # allow multiple barcodes if its a replicate. barcodes = [] for n in range(500): fname = "{}-technical-replicate-{}".format(sname, n) fbar = self.data.barcodes.get(fname) if fbar: barcodes.append(fbar) if barcodes: sample.barcode = barcodes else: sample.barcode = self.data.barcodes[sname] # file names if 'pair' in self.data.params.datatype: sample.files.fastqs = [( os.path.join(self.data.dirs.fastqs, sname + "_R1_.fastq.gz"), os.path.join(self.data.dirs.fastqs, sname + "_R2_.fastq.gz"), )] else: sample.files.fastqs = [ (os.path.join( self.data.dirs.fastqs, sname + "_R1_.fastq.gz", ), ""), ] # fill in the summary stats sample.stats["reads_raw"] = int(self.stats.fsamplehits[sname]) # fill in the full df stats value sample.stats_dfs.s1["reads_raw"] = int( self.stats.fsamplehits[sname]) # Only link Sample if it has data if sample.stats["reads_raw"]: sample.stats.state = 1 self.data.samples[sample.name] = sample else: print("Excluded sample: no data found for", sname) # initiate s1 key for data object self.data.stats_dfs.s1 = self.data._build_stat("s1") # cleanup shutil.rmtree(self.tmpdir)
def make_stats(data, raws): """ reads in pickled stats, collates, and writes to file """ ## stats for each rawdata file perfile = {} for rawtuple in raws: handle = os.path.splitext(os.path.basename(rawtuple[0]))[0] perfile[handle] = {} perfile[handle]["ftotal"] = 0 perfile[handle]["fcutfound"] = 0 perfile[handle]["fmatched"] = 0 ## stats for each sample fdbars = {} fsamplehits = Counter() fbarhits = Counter() fmisses = Counter() ## get stats from each file pickle pickles = glob.glob(os.path.join(data.dirs.fastqs, "*.pickle")) for picfile in pickles: with open(picfile, "rb") as pickin: filestats, samplestats = pickle.load(pickin) #counts = [total, cutfound, matched] handle, total, cutfound, matched = filestats samplehits, barhits, misses, dbars = samplestats ## update file stats perfile[handle]["ftotal"] += total perfile[handle]["fcutfound"] += cutfound perfile[handle]["fmatched"] += matched ## update sample stats fsamplehits.update(samplehits) fbarhits.update(barhits) fmisses.update(misses) fdbars.update(dbars) data.statsfiles.s1 = os.path.join(data.dirs.fastqs, 's1_demultiplex_stats.txt') outfile = open(data.statsfiles.s1, 'w') ## how many from each rawfile outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format("raw_file", "total_reads", "cut_found", "bar_matched")) ## sort rawfile names rawfilenames = sorted(perfile) for rawstat in rawfilenames: dat = [perfile[rawstat][i] for i in ["ftotal", "fcutfound", "fmatched"]] outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(*[rawstat]+[str(i) for i in dat])) if "pair" in data.paramsdict["datatype"]: rawstat2 = rawstat.replace("_R1_", "_R2_") outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(*[rawstat2]+[str(i) for i in dat])) ## spacer, how many records for each sample outfile.write('\n{:<35} {:>13}\n'.\ format("sample_name", "total_R1_reads")) ## names alphabetical names_sorted = sorted(data.barcodes) for name in names_sorted: outfile.write("{:<35} {:>13}\n".format(name, fsamplehits[name])) ## spacer, which barcodes were found outfile.write('\n{:<35} {:>13}{:>13}{:>13}\n'.\ format("sample_name", "true_bar", "obs_bar", "N_records")) ## write sample results for name in names_sorted: ## write perfect hit hit = data.barcodes[name] outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(name, hit, hit, fsamplehits[name])) ## write off-n hits ## sort list of off-n hits if name in fdbars: offkeys = list(fdbars.get(name)) offkeys.sort(key=fbarhits.get) for offhit in offkeys[::-1]: ## exclude perfect hit if offhit not in data.barcodes.values(): outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(name, hit, offhit, fbarhits[offhit])) ## write misses misskeys = list(fmisses.keys()) misskeys.sort(key=fmisses.get) for key in misskeys[::-1]: outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format("no_match", "_", key, fmisses[key])) outfile.close() ## Link Sample with this data file to the Assembly object for name in data.barcodes: sample = Sample() sample.name = name sample.barcode = data.barcodes[name] if "pair" in data.paramsdict["datatype"]: sample.files.fastqs = [(os.path.join(data.dirs.fastqs, name+"_R1_.fastq.gz"), os.path.join(data.dirs.fastqs, name+"_R2_.fastq.gz"))] else: sample.files.fastqs = [(os.path.join(data.dirs.fastqs, name+"_R1_.fastq.gz"),)] sample.stats["reads_raw"] = fsamplehits[name] if sample.stats["reads_raw"]: sample.stats.state = 1 data.samples[sample.name] = sample else: print("Excluded sample: no data found for", name)
def remote_run_linker(self): "read in fastq files and count nreads for stats and chunking in s2." # local counters createdinc = 0 # iterate over input files for ftup in self.ftuples: # remove file extension from name sname = get_name_from_file(ftup[0], None, None) # Create new Sample Class objects with names from files if sname not in self.data.samples: newsamp = Sample(sname) newsamp.stats.state = 1 newsamp.barcode = None newsamp.files.fastqs = [ftup] self.data.samples[sname] = newsamp createdinc += 1 # send jobs to engines for counting with cat/zcat | wc rasyncs = {} if createdinc: for sample in self.data.samples.values(): # get zip var gzipped = bool(sample.files.fastqs[0][0].endswith(".gz")) # submit job to count lines and store async rasyncs[sample.name] = self.lbview.apply( zbufcountlines, *(sample.files.fastqs[0][0], gzipped)) # wait for link jobs to finish if parallel start = time.time() printstr = ("loading reads ", "s1") while 1: fin = [i.ready() for i in rasyncs.values()] self.data._progressbar(len(fin), sum(fin), start, printstr) time.sleep(0.1) if len(fin) == sum(fin): self.data._print("") break # collect link job results for sname in rasyncs: res = rasyncs[sname].get() / 4 self.data.samples[sname].stats.reads_raw = res self.data.samples[sname].stats_dfs.s1["reads_raw"] = res self.data.samples[sname].state = 1 # print if data were linked if createdinc: # double for paired data if 'pair' in self.data.params.datatype: createdinc = createdinc * 2 if self.data._cli: self.data._print("{} fastq files loaded to {} Samples.".format( createdinc, len(self.data.samples), )) # save step-1 stats. We don't want to write this to the fastq dir, b/c # it is not necessarily inside our project dir. Instead, we'll write # this file into our project dir in the case of linked_fastqs. self.data.stats_dfs.s1 = self.data._build_stat("s1") self.data.stats_files.s1 = os.path.join( self.data.params.project_dir, self.data.name + '_s1_demultiplex_stats.txt') with open(self.data.stats_files.s1, 'w') as outfile: (self.data.stats_dfs.s1.fillna(value=0).astype( np.int).to_string(outfile))
def make_stats(data, raws): """ reads in pickled stats, collates, and writes to file """ ## stats for each rawdata file perfile = {} for rawtuple in raws: handle = os.path.splitext(os.path.basename(rawtuple[0]))[0] perfile[handle] = {} perfile[handle]["ftotal"] = 0 perfile[handle]["fcutfound"] = 0 perfile[handle]["fmatched"] = 0 ## stats for each sample fdbars = {} fsamplehits = Counter() fbarhits = Counter() fmisses = Counter() ## get stats from each file pickle pickles = glob.glob(os.path.join(data.dirs.fastqs, "*.pickle")) for picfile in pickles: with open(picfile, "rb") as pickin: filestats, samplestats = pickle.load(pickin) #counts = [total, cutfound, matched] handle, total, cutfound, matched = filestats samplehits, barhits, misses, dbars = samplestats ## update file stats perfile[handle]["ftotal"] += total perfile[handle]["fcutfound"] += cutfound perfile[handle]["fmatched"] += matched ## update sample stats fsamplehits.update(samplehits) fbarhits.update(barhits) fmisses.update(misses) fdbars.update(dbars) data.statsfiles.s1 = os.path.join(data.dirs.fastqs, 's1_demultiplex_stats.txt') outfile = open(data.statsfiles.s1, 'w') ## how many from each rawfile outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format("raw_file", "total_reads", "cut_found", "bar_matched")) ## sort rawfile names rawfilenames = sorted(perfile) for rawstat in rawfilenames: dat = [ perfile[rawstat][i] for i in ["ftotal", "fcutfound", "fmatched"] ] outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(*[rawstat]+[str(i) for i in dat])) if "pair" in data.paramsdict["datatype"]: rawstat2 = rawstat.replace("_R1_", "_R2_") outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(*[rawstat2]+[str(i) for i in dat])) ## spacer, how many records for each sample outfile.write('\n{:<35} {:>13}\n'.\ format("sample_name", "total_R1_reads")) ## names alphabetical names_sorted = sorted(data.barcodes) for name in names_sorted: outfile.write("{:<35} {:>13}\n".format(name, fsamplehits[name])) ## spacer, which barcodes were found outfile.write('\n{:<35} {:>13}{:>13}{:>13}\n'.\ format("sample_name", "true_bar", "obs_bar", "N_records")) ## write sample results for name in names_sorted: ## write perfect hit hit = data.barcodes[name] outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(name, hit, hit, fsamplehits[name])) ## write off-n hits ## sort list of off-n hits if name in fdbars: offkeys = list(fdbars.get(name)) offkeys.sort(key=fbarhits.get) for offhit in offkeys[::-1]: ## exclude perfect hit if offhit not in data.barcodes.values(): outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(name, hit, offhit, fbarhits[offhit])) ## write misses misskeys = list(fmisses.keys()) misskeys.sort(key=fmisses.get) for key in misskeys[::-1]: outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format("no_match", "_", key, fmisses[key])) outfile.close() ## Link Sample with this data file to the Assembly object for name in data.barcodes: sample = Sample() sample.name = name sample.barcode = data.barcodes[name] if "pair" in data.paramsdict["datatype"]: sample.files.fastqs = [(os.path.join(data.dirs.fastqs, name + "_R1_.fastq.gz"), os.path.join(data.dirs.fastqs, name + "_R2_.fastq.gz"))] else: sample.files.fastqs = [(os.path.join(data.dirs.fastqs, name + "_R1_.fastq.gz"), )] sample.stats["reads_raw"] = fsamplehits[name] if sample.stats["reads_raw"]: sample.stats.state = 1 data.samples[sample.name] = sample else: print("Excluded sample: no data found for", name)
def link_fastqs(self, path=None, merged=False, force=False, append=False): """ Create Sample objects for samples in sorted_fastq_path. Note ---- link_fastqs() is called automatically during step2() if no Samples are yet present in the Assembly object (data were not demultiplexed in step1().) It looks for demultiplexed data files located in the [sorted_fastq_path]. Parameters ---------- path : str Path to the fastq files to be linked to Sample objects. The default location is to select all files in the 'sorted_fastq_path'. Alternatively a different path can be entered here. merged : bool Set to True if files represent first and second reads that were merged using some external software such as `PEAR` or `VSEARCH`. append : bool The default action is to overwrite fastq files linked to Samples if they already have linked files. Use append=True to instead append additional fastq files to a Sample (file names should be formatted the same as usual, e.g., [name]_R1_[optional].fastq.gz). Returns ------- str Prints the number of new Sample objects created and the number of fastq files linked to Sample objects in the Assembly object. """ ## cannot both force and append at once if force and append: raise Exception("Cannot use force and append at the same time.") if self.samples and not (force or append): raise Exception("Files already linked to `{}`. ".format(self.name)\ +"Use force=True to replace all files, or append=True to " +"add additional files to existing Samples.") ## get path to data files if not path: path = self.paramsdict["sorted_fastq_path"] ## does location exist, if no files selected, try selecting all if os.path.isdir(path): path += "*" ## grab fastqs/fq/gzip/all fastqs = glob.glob(path) fastqs = [i for i in fastqs if i.endswith(".gz") \ or i.endswith(".fastq") \ or i.endswith(".fq")] ## sort alphabetical fastqs.sort() ## link pairs into tuples if 'pair' in self.paramsdict["datatype"]: ## check that names fit the paired naming convention r1_files = [i for i in fastqs if "_R1_" in i] r2_files = [i.replace("_R1_", "_R2_") for i in r1_files] if not any(["_R1_" in i for i in fastqs]) or \ (len(r1_files) != len(r2_files)): raise Exception("File name format error: paired file names " \ +"must be identical except for _R1_ and _R2_ in their names.") fastqs = [(i, j) for i, j in zip(r1_files, r2_files)] ## data are not paired, create empty tuple pair else: if any(["_R2_" in i for i in fastqs]): print("Given the presence of '_R2_' in file names, this "\ +"is a warning that if your data are paired-end you should set "\ +"the Assembly object datatype to a paired type (e.g., "\ +"pairddrad or pairgbs) prior to running link_fastqs().") fastqs = [(i, ) for i in fastqs] ## counters for the printed output created = 0 linked = 0 appended = 0 for fastqtuple in list(fastqs): assert isinstance(fastqtuple, tuple), "fastqs not a tuple." ## local counters createdinc = 0 linkedinc = 0 appendinc = 0 ## remove file extension from name sname = _name_from_file(fastqtuple[0]) if sname not in self.samples: ## create new Sample self.samples[sname] = Sample(sname) self.samples[sname].stats.state = 1 self.samples[sname].barcode = None self.samples[sname].files.fastqs.append(fastqtuple) createdinc += 1 linkedinc += 1 else: ## if not forcing, shouldn't be here with existing Samples if append: if fastqtuple not in self.samples[sname].files.fastqs: self.samples[sname].files.fastqs.append(fastqtuple) appendinc += 1 else: print("The files {} are already in Sample {}, "\ .format(fastqtuple, sname) \ +"cannot append duplicate files to a Sample.\n") elif force: ## create new Sample self.samples[sname] = Sample(sname) self.samples[sname].stats.state = 1 self.samples[sname].barcode = None self.samples[sname].files.fastqs.append(fastqtuple) createdinc += 1 linkedinc += 1 else: print("The files {} are already in Sample.".format(sname) \ + " Use append=True to append additional files to a Sample"\ + " or force=True to replace all existing Samples.") ## record whether data were merged. if merged: self.samples[sname].merged = 1 ## do not allow merged=False and .forward in file names if (merged == False) and ('forward' in fastqtuple[0]): print(\ "If R1 and R2 data are merged (e.g., with PEAR) " \ + "use link_fastqs(merge=True) to indicate this. You " \ + "may need force=True to overwrite existing files.\n") ## if fastqs already demultiplexed, try to link stats if any([linkedinc, createdinc, appendinc]): gzipped = bool(fastqtuple[0].endswith(".gz")) nreads = 0 ## iterate over files if there are multiple for alltuples in self.samples[sname].files.fastqs: nreads += bufcount(alltuples[0], gzipped) self.samples[sname].stats.reads_raw = nreads / 4 created += createdinc linked += linkedinc appended += appendinc ## print if data were linked print("{} new Samples created in `{}`.".format(created, self.name)) if linked: print("{} fastq files linked to {} new Samples.".\ format(linked, len(self.samples))) if appended: print("{} fastq files appended to {} existing Samples.".\ format(appended, len(self.samples)))
def make_stats(data, perfile, fsamplehits, fbarhits, fmisses, fdbars): """ Write stats and stores to Assembly object. """ ## out file outhandle = os.path.join(data.dirs.fastqs, 's1_demultiplex_stats.txt') outfile = open(outhandle, 'w') ## write the header for file stats outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format("raw_file", "total_reads", "cut_found", "bar_matched")) ## write the file stats r1names = sorted(perfile) for fname in r1names: dat = perfile[fname] #dat = [perfile[fname][i] for i in ["ftotal", "fcutfound", "fmatched"]] outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(fname, dat[0], dat[1], dat[2])) ## repeat for pairfile if 'pair' in data.paramsdict["datatype"]: fname = fname.replace("_R1_", "_R2_") outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(fname, dat[0], dat[1], dat[2])) ## spacer, how many records for each sample outfile.write('\n{:<35} {:>13}\n'.format("sample_name", "total_reads")) ## names alphabetical. Write to file. Will save again below to Samples. names_sorted = sorted(data.barcodes) for name in names_sorted: outfile.write("{:<35} {:>13}\n".format(name, fsamplehits[name])) ## spacer, which barcodes were found outfile.write('\n{:<35} {:>13} {:>13} {:>13}\n'.\ format("sample_name", "true_bar", "obs_bar", "N_records")) ## write sample results for name in names_sorted: ## write perfect hit hit = data.barcodes[name] offhitstring = "" sumoffhits = 0 ## write off-n hits ## sort list of off-n hits if name in fdbars: offkeys = list(fdbars.get(name)) offkeys.sort(key=fbarhits.get) for offhit in offkeys[::-1]: ## exclude perfect hit if offhit not in data.barcodes.values(): offhitstring += '{:<35} {:>13} {:>13} {:>13}\n'.\ format(name, hit, offhit, fbarhits[offhit]) sumoffhits += fbarhits[offhit] ## write string to file outfile.write('{:<35} {:>13} {:>13} {:>13}\n'.\ format(name, hit, hit, fsamplehits[name]-sumoffhits)) outfile.write(offhitstring) ## write misses misskeys = list(fmisses.keys()) misskeys.sort(key=fmisses.get) for key in misskeys[::-1]: outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format("no_match", "_", key, fmisses[key])) outfile.close() ## Link Sample with this data file to the Assembly object for name in data.barcodes: sample = Sample() sample.name = name sample.barcode = data.barcodes[name] if 'pair' in data.paramsdict["datatype"]: sample.files.fastqs = [(os.path.join(data.dirs.fastqs, name+"_R1_.fastq.gz"), os.path.join(data.dirs.fastqs, name+"_R2_.fastq.gz"))] else: sample.files.fastqs = [(os.path.join(data.dirs.fastqs, name+"_R1_.fastq.gz"), "")] ## fill in the summary stats sample.stats["reads_raw"] = int(fsamplehits[name]) ## fill in the full df stats value sample.stats_dfs.s1["reads_raw"] = int(fsamplehits[name]) ## Only link Sample if it has data if sample.stats["reads_raw"]: sample.stats.state = 1 data.samples[sample.name] = sample else: print("Excluded sample: no data found for", name) ## initiate s1 key for data object data.stats_dfs.s1 = data._build_stat("s1") data.stats_files.s1 = outhandle
def make_stats(data, perfile, fsamplehits, fbarhits, fmisses, fdbars): """ Write stats and stores to Assembly object. """ ## out file outhandle = os.path.join(data.dirs.fastqs, 's1_demultiplex_stats.txt') outfile = open(outhandle, 'w') ## how many from each rawfile outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format("raw_file", "total_reads", "cut_found", "bar_matched")) ## sort rawfile names rawfilenames = sorted(perfile) for rawstat in rawfilenames: dat = [ perfile[rawstat][i] for i in ["ftotal", "fcutfound", "fmatched"] ] outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(*[rawstat]+[str(i) for i in dat])) if "pair" in data.paramsdict["datatype"]: rawstat2 = rawstat.replace("_R1_", "_R2_") outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(*[rawstat2]+[str(i) for i in dat])) ## spacer, how many records for each sample outfile.write('\n{:<35} {:>13}\n'.\ format("sample_name", "total_reads")) ## names alphabetical. Write to file. Will save again below to Samples. names_sorted = sorted(data.barcodes) for name in names_sorted: outfile.write("{:<35} {:>13}\n".format(name, fsamplehits[name])) ## spacer, which barcodes were found outfile.write('\n{:<35} {:>13}{:>13}{:>13}\n'.\ format("sample_name", "true_bar", "obs_bar", "N_records")) ## write sample results for name in names_sorted: ## write perfect hit hit = data.barcodes[name] outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(name, hit, hit, fsamplehits[name])) ## write off-n hits ## sort list of off-n hits if name in fdbars: offkeys = list(fdbars.get(name)) offkeys.sort(key=fbarhits.get) for offhit in offkeys[::-1]: ## exclude perfect hit if offhit not in data.barcodes.values(): outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format(name, hit, offhit, fbarhits[offhit])) ## write misses misskeys = list(fmisses.keys()) misskeys.sort(key=fmisses.get) for key in misskeys[::-1]: outfile.write('{:<35} {:>13}{:>13}{:>13}\n'.\ format("no_match", "_", key, fmisses[key])) outfile.close() ## Link Sample with this data file to the Assembly object for name in data.barcodes: sample = Sample() sample.name = name sample.barcode = data.barcodes[name] if "pair" in data.paramsdict["datatype"]: sample.files.fastqs = [(os.path.join(data.dirs.fastqs, name + "_R1_.fastq.gz"), os.path.join(data.dirs.fastqs, name + "_R2_.fastq.gz"))] else: sample.files.fastqs = [(os.path.join(data.dirs.fastqs, name + "_R1_.fastq.gz"), "")] ## fill in the summary stats sample.stats["reads_raw"] = int(fsamplehits[name]) ## fill in the full df stats value sample.stats_dfs.s1["reads_raw"] = int(fsamplehits[name]) ## Only link Sample if it has data if sample.stats["reads_raw"]: sample.stats.state = 1 data.samples[sample.name] = sample else: print("Excluded sample: no data found for", name) ## initiate s1 key for data object data.stats_dfs.s1 = data.build_stat("s1") data.stats_files.s1 = outhandle