def samples_seq(self): '''Sequencing samples that refer to this patient sample''' if self._sequenced_samples is None: #TODO: optimize this call from hivwholeseq.patients.filenames import get_mapped_to_initial_filename from hivwholeseq.sequencing.samples import load_samples_sequenced as lss samples_seq = lss() samples_seq = samples_seq.loc[samples_seq['patient sample'] == self.name] self._sequenced_samples = samples_seq return self._sequenced_samples.copy()
def discard_nonsequenced_samples(self): '''Discard all samples that have not been sequenced yet''' from hivwholeseq.sequencing.samples import load_samples_sequenced as lss samples_sequenced = lss() samples_sequenced_set = set(samples_sequenced.loc[:, 'patient sample']) - set(['nan']) samples = self.samples.loc[self.samples.index.isin(samples_sequenced_set)] ## Add info on sequencing ## FIXME: why is this here?! ## FIXME: this is buggy is so many ways... pandas is nto great at this #samples_seq_col = [] #for samplename in samples.index: # ind = samples_sequenced.loc[:, 'patient sample'] == samplename # samples_seq_col.append(samples_sequenced.loc[ind]) #samples.loc[:, 'samples seq'] = samples_seq_col self.samples = samples
def discard_nonsequenced_samples(self): '''Discard all samples that have not been sequenced yet''' from hivwholeseq.sequencing.samples import load_samples_sequenced as lss samples_sequenced = lss() samples_sequenced_set = set( samples_sequenced.loc[:, 'patient sample']) - set(['nan']) samples = self.samples.loc[self.samples.index.isin( samples_sequenced_set)] ## Add info on sequencing ## FIXME: why is this here?! ## FIXME: this is buggy is so many ways... pandas is nto great at this #samples_seq_col = [] #for samplename in samples.index: # ind = samples_sequenced.loc[:, 'patient sample'] == samplename # samples_seq_col.append(samples_sequenced.loc[ind]) #samples.loc[:, 'samples seq'] = samples_seq_col self.samples = samples
parser = argparse.ArgumentParser(description="make figure") parser.add_argument('--redo', action='store_true', help='recalculate data') params = parser.parse_args() fragment = 'F1' VERBOSE = 2 username = os.path.split(os.getenv('HOME'))[-1] foldername = get_figure_folder(username, 'first') fn_data = foldername+'data/' mkdirs(fn_data) fn_data = fn_data + 'minor_alleles_example.pickle' if not os.path.isfile(fn_data) or params.redo: samplename = 'NL4-3' sample = lss(samplename) counts = sample.get_allele_counts(fragment, merge_read_types=True) data = compress_data(counts, samplename, fragment) samplename = '27134' sample = lssp(samplename) counts = sample.get_allele_counts(fragment, merge_read_types=True) data = compress_data(counts, samplename, fragment, data=data) store_data(data, fn_data) else: data = load_data(fn_data) plot_minor_allele_example(data, VERBOSE=VERBOSE,
import sys import os from hivwholeseq.utils.generic import mkdirs from hivwholeseq.patients.samples import itersample from hivwholeseq.sequencing.samples import load_samples_sequenced as lss from hivwholeseq.patients.samples import load_samples_sequenced as lssp from hivwholeseq.sequencing.filenames import get_sample_foldername # Script if __name__ == '__main__': samples_pat = lssp() samples_seq = lss() for samplename, sample in itersample(samples_pat): root_foldername = sample.get_foldername()+'samples_sequencing/' mkdirs(root_foldername) for samplenameseq, sampleseq in samples_seq.iterrows(): if sampleseq['patient sample'] == samplename: src_folder = get_sample_foldername(samplenameseq) dst_folder = root_foldername+samplenameseq if not os.path.islink(dst_folder): os.symlink(src_folder, dst_folder) print 'Symlink:', src_folder, dst_folder else: print 'Esists:', dst_folder
help='Execute the script in parallel on the cluster') args = parser.parse_args() seq_runs = args.runs adaIDs = args.adaIDs use_pats = args.use_pats use_interactive = args.interactive detail = args.detail submit = args.submit if submit: fork_self(seq_runs, adaIDs=adaIDs, pats=use_pats, detail=detail) sys.exit() samples_pat = lssp(include_wrong=True) samples = lss() samples = samples.loc[samples['seq run'].isin(seq_runs)] if adaIDs is not None: samples = samples.loc[samples.adapter.isin(adaIDs)] if len(seq_runs) >= 2: samples.sort(columns=['patient sample', 'seq run'], inplace=True) for isa, (samplename, sample) in enumerate(samples.iterrows()): sample = SampleSeq(sample) print sample.name, 'seq:', sample['seq run'], sample.adapter, if sample['patient sample'] == 'nan': print 'not a patient sample', if use_pats: print '(skip)'
help='Number of reads analyzed') parser.add_argument('--verbose', type=int, default=0, help='Verbosity level [0-3]') parser.add_argument('--minor-allele', action='store_true', dest='minor_allele', help='Plot also minor allele') args = parser.parse_args() samplenames = args.samples seq_runs = args.runs adaIDs = args.adaIDs fragments = args.fragments maxreads = args.maxreads VERBOSE = args.verbose use_minor_allele = args.minor_allele samples = lss() if samplenames is not None: samples = samples.loc[samples.index.isin(samplenames)] else: ind = np.zeros(len(samples), bool) for seq_run in seq_runs: dataset = load_sequencing_run(seq_run) data_folder = dataset.folder samples_run = dataset.samples # If the script is called with no adaID, iterate over all if adaIDs is not None: samples_run = samples_run.loc[samples_run.adapter.isin(adaIDs)] ind |= samples.index.isin(samples_run.index)
parser = argparse.ArgumentParser(description="make figure") parser.add_argument('--redo', action='store_true', help='recalculate data') params = parser.parse_args() fragment = 'F1' VERBOSE = 2 username = os.path.split(os.getenv('HOME'))[-1] foldername = get_figure_folder(username, 'first') fn_data = foldername + 'data/' mkdirs(fn_data) fn_data = fn_data + 'minor_alleles_example.pickle' if not os.path.isfile(fn_data) or params.redo: samplename = 'NL4-3' sample = lss(samplename) counts = sample.get_allele_counts(fragment, merge_read_types=True) data = compress_data(counts, samplename, fragment) samplename = '27134' sample = lssp(samplename) counts = sample.get_allele_counts(fragment, merge_read_types=True) data = compress_data(counts, samplename, fragment, data=data) store_data(data, fn_data) else: data = load_data(fn_data) plot_minor_allele_example( data, VERBOSE=VERBOSE,