def parse_metadata(self, data_dir): samples_dict = defaultdict(list) wf_conf_dict = {} for r_ix, r in self.records.iterrows(): read_type = r['paired-end or single-end'].lower() sample_info = {'treatment': r['name']} wf_key = '-'.join([read_type]) if 'control' in r.keys() and r['control'] and type(r['control']) != float: # After reading this metadata info, this will contain a nan (float) if undetermined sample_info['control'] = r['control'] wf_key += '-with-control' wf_conf_dict[wf_key] = {'rt': read_type, 'st': sample_info.keys()} genome = consts.GENOME # Default genome if 'genome' in r.keys(): genome = r['genome'] samples_dict[wf_key].append([sample_info, genome]) for wf_key, samples_genomes in samples_dict.iteritems(): if self.obj.separate_jsons: for si, s in enumerate(sorted(samples_genomes)): sample, genome = s[0], s[1] ref_dataset = consts.ReferenceDataset(genome) self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], [sample], data_dir, self.experiment_type), wf_key, si else: samples_list, genomes_list = zip(*samples_genomes) if len(set(genomes_list)) > 1: raise Exception('More than one genome specified (%s). Please create a different metadata file' ' per genome or provide a sjdb and specify the --separate-jsons argument' % ', '.join(set(genomes_list))) ref_dataset = consts.ReferenceDataset(genomes_list[0]) self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], sorted(samples_list), data_dir, self.experiment_type), wf_key, None
def parse_metadata(self, data_dir): samples_dict = defaultdict(list) wf_conf_dict = {} for r in self.records: read_type = r['Paired-end or single-end'].lower() sample_name = r['Name'] strand_specific = r['Strand specificity'] genome = consts.GENOME # Default genome if 'Genome' in r.keys(): genome = r['Genome'] kws = [read_type, strand_specific] if self.skip_star_2pass: kws.append('with-sjdb') wf_key = '-'.join(kws) wf_conf_dict[wf_key] = {'rt': read_type, 'sn': sample_name} samples_dict[wf_key].append([sample_name, genome]) for wf_key, samples_gemomes in samples_dict.iteritems(): if self.obj.separate_jsons: for si, s in enumerate(sorted(samples_gemomes)): sample, genome = s[0], s[1] ref_dataset = consts.ReferenceDataset(genome, read_length=self.read_length) self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], [sample], data_dir), wf_key, si else: samples_list = [s[0] for s in samples_genomes] genomes_list = [g[1] for g in samples_genomes] if len(set(genomes_list)) > 1: raise Exception( 'More than one genome specified (%s). Please create a different metadata file' ' per genome or provide a sjdb and specify the --separate-jsons argument' % ', '.join(set(genomes_list))) ref_dataset = consts.ReferenceDataset(genomes_list[0], read_length=self.read_length) self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], sorted(samples_list), data_dir), wf_key, None
def parse_metadata(self, data_dir): samples_dict = defaultdict(list) wf_conf_dict = {} for rix, r in self.records.iterrows(): read_type = r['paired-end or single-end'].lower() sample_name = r['name'] strand_specific = r['strand specificity'] genome = consts.GENOME # Default genome if 'genome' in r.keys(): genome = r['genome'] ercc_spikein = False if 'with ercc spike-in' in r.keys(): ercc_spikein = r['with ercc spike-in'] kws = [read_type, strand_specific] if self.skip_star_2pass: kws.append('with-sjdb') wf_key = '-'.join(kws) wf_conf_dict[wf_key] = {'rt': read_type, 'sn': sample_name} read_length = self.read_length if 'read length' in r.keys(): read_length = int(r['read length']) samples_dict[wf_key].append([sample_name, genome, ercc_spikein, read_length]) for wf_key, samples_genomes in samples_dict.iteritems(): if self.obj.separate_jsons: for si, s in enumerate(sorted(samples_genomes)): sample, genome, ercc_spikein, read_length = s ref_dataset = consts.ReferenceDataset(genome, read_length=read_length, with_ercc=ercc_spikein) self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], [sample], data_dir), wf_key, si else: samples_list = [s[0] for s in samples_genomes] genomes_list = [g[1] for g in samples_genomes] ercc_list = [e[2] for e in samples_genomes] read_length_list = [l[3] for l in samples_genomes] if len(set(genomes_list)) > 1: raise Exception( 'More than one genome specified (%s). Please create a different metadata file' ' per genome or provide a sjdb and specify the --separate-jsons argument' % ', '.join(set(genomes_list))) if len(set(ercc_list)) > 1: raise Exception( 'With and without ERCC spike-in specified. Please create a different metadata file' ' per ERCC choice or provide a sjdb and specify the --separate-jsons argument') if len(set(read_length_list)) > 1: raise Exception( 'More than one read length specified. Please create a different metadata file' ' per read length choice or provide a sjdb and specify the --separate-jsons argument') ref_dataset = consts.ReferenceDataset(genomes_list[0], read_length=read_length_list[0], with_ercc=ercc_list[0]) self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], sorted(samples_list), data_dir), wf_key, None
def parse_metadata(self, data_dir): samples_dict = defaultdict(list) wf_conf_dict = {} for rix, r in self.records.iterrows(): read_type = r['paired-end or single-end'].lower() sample_name = r['name'] genome = consts.GENOME # Default genome if 'genome' in r.keys(): genome = r['genome'] kws = [read_type] wf_key = '-'.join(kws) with_umis = 'umis' in r.keys() and not is_false(r['umis']) if with_umis: wf_key += '-umis' wf_conf_dict[wf_key] = { 'rt': read_type, 'sn': sample_name, 'umis': with_umis } samples_dict[wf_key].append([sample_name, genome]) for wf_key, samples_genomes in samples_dict.iteritems(): if self.obj.separate_jsons: for si, s in enumerate(sorted(samples_genomes)): sample, genome = s[0], s[1] ref_dataset = consts.ReferenceDataset( genome, read_length=self.read_length, umis='umis' in wf_key) self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], [sample], data_dir), wf_key, si else: samples_list = [s[0] for s in samples_genomes] genomes_list = [g[1] for g in samples_genomes] if len(set(genomes_list)) > 1: raise Exception( 'More than one genome specified (%s). Please create a different metadata file' ' per genome or provide a sjdb and specify the --separate-jsons argument' % ', '.join(set(genomes_list))) ref_dataset = consts.ReferenceDataset( genomes_list[0], read_length=self.read_length, umis='umis' in wf_key) self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], sorted(samples_list), data_dir), wf_key, None
def parse_metadata(self, data_dir): samples_dict = defaultdict(list) wf_conf_dict = {} for r in self.records: read_type = r['Paired-end or single-end'].lower() peak_type = r['Peak type'].lower() sample_info = {'treatment': r['Name'], 'iter': r['Iter num']} wf_key = '-'.join([read_type, peak_type]) if 'Control' in r.keys( ) and r['Control'] and r['Control'].upper() != 'NA': sample_info['control'] = r['Control'] wf_key += '-with-control' wf_conf_dict[wf_key] = { 'rt': read_type, 'pt': peak_type, 'st': sample_info.keys() } genome = 'hg38' # Default genome if 'Genome' in r.keys(): genome = r['Genome'] samples_dict[wf_key].append([sample_info, genome]) for wf_key, samples_genomes in samples_dict.iteritems(): if self.obj.separate_jsons: for si, s in enumerate(sorted(samples_genomes)): sample, genome = s[0], s[1] ref_dataset = consts.ReferenceDataset(genome) self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], [sample], data_dir, self.experiment_type), wf_key, si else: samples_list, genomes_list = zip(*samples_genomes) if len(set(genomes_list)) > 1: raise Exception( 'More than one genome specified (%s). Please create a different metadata file' ' per genome or provide a sjdb and specify the --separate-jsons argument' % ', '.join(set(genomes_list))) ref_dataset = consts.ReferenceDataset(genomes_list[0]) self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], sorted(samples_list), data_dir, self.experiment_type), wf_key, None
def parse_metadata(self, data_dir): samples_dict = defaultdict(list) wf_conf_dict = {} for r in self.records: read_type = r['Paired-end or single-end'].lower() sample_info = {'treatment': r['Name'], 'iter': r['Iter num']} wf_key = '-'.join([read_type]) genome = 'hg38' # Default genome if 'Genome' in r.keys(): genome = r['Genome'] if not ('Blacklist removal' in r.keys() and is_false(r['Blacklist removal'])): wf_key += '-blacklist-removal' wf_conf_dict[wf_key] = {'rt': read_type} samples_dict[wf_key].append([sample_info, genome]) for wf_key, samples_genomes in samples_dict.iteritems(): if self.obj.separate_jsons: for si, s in enumerate(sorted(samples_genomes)): sample, genome = s[0], s[1] ref_dataset = consts.ReferenceDataset(genome) if 'blacklist-removal' not in wf_key: ref_dataset.encode_blacklist_bedfile = None self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], [sample], data_dir, self.experiment_type), wf_key, si else: samples_list = [s[0] for s in samples_genomes] genomes_list = [g[1] for g in samples_genomes] if len(set(genomes_list)) > 1: raise Exception( 'More than one genome specified (%s). Please create a different metadata file' ' per genome or provide a sjdb and specify the --separate-jsons argument' % ', '.join(set(genomes_list))) ref_dataset = consts.ReferenceDataset(genomes_list[0]) if 'blacklist-removal' not in wf_key: ref_dataset.encode_blacklist_bedfile = None self.update_paths(ref_dataset) yield self.render_json(wf_conf_dict[wf_key], sorted(samples_list), data_dir, self.experiment_type), wf_key, None