def setUpClass(cls): # Temporarily add new sample for use in exclusion tests s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") s = s_con.get_entry("1_121015_BB002BBBXX_TGACCA") kw = copy.deepcopy(s) del kw["_id"] new_s = SampleRunMetricsDocument(**kw) new_s["sequence"] = "AGTTGA" new_s["name"] = "1_121015_BB002BBBXX_AGTTGA" s_con.save(new_s) kw = copy.deepcopy(s) del kw["_id"] new_s = SampleRunMetricsDocument(**kw) new_s["sample_prj"] = "j-doe_00_01" new_s["sequence"] = "CGAACG" new_s["name"] = "1_121015_BB002BBBXX_CGAACG" s_con.save(new_s) s = s_con.get_entry("3_120924_AC003CCCXX_ACAGTG") kw = copy.deepcopy(s) del kw["_id"] new_s = SampleRunMetricsDocument(**kw) new_s["sample_prj"] = "j-doe_00_02" new_s["sequence"] = "GGAAGG" new_s["name"] = "3_120924_AC003CCCXX_GGAAGG" s_con.save(new_s)
def tearDownClass(cls): s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") s = s_con.get_entry("1_121015_BB002BBBXX_AGTTGA") doc = s_con.db.get(s["_id"]) s_con.db.delete(doc) s = s_con.get_entry("1_121015_BB002BBBXX_CGAACG") doc = s_con.db.get(s["_id"]) s_con.db.delete(doc) s = s_con.get_entry("3_120924_AC003CCCXX_GGAAGG") doc = s_con.db.get(s["_id"]) s_con.db.delete(doc)
def test_get_flowcell(self): """Test getting a flowcell for a given sample""" sample_con = SampleRunMetricsConnection( dbname="samples-test", username=self.user, password=self.pw, url=self.url ) fc = sample_con.get_entry(self.examples["sample"], "flowcell") self.assertEqual(str(fc), self.examples["flowcell"])
def test_dbcon(self): """Test database connection and that we get expected values.""" s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") samples = [s_con.get_entry(x) for x in s_con.name_view] samples_d = {x["name"]: x for x in samples} self.assertEqual(samples_d["1_120924_AC003CCCXX_TGACCA"]["date"], "120924") self.assertEqual(samples_d["1_121015_BB002BBBXX_TGACCA"]["flowcell"], "BB002BBBXX") self.assertEqual(samples_d["2_120924_AC003CCCXX_ACAGTG"]["entity_type"], "sample_run_metrics") self.assertEqual(samples_d["3_120924_AC003CCCXX_ACAGTG"]["lane"], "3") self.assertEqual(samples_d["4_120924_AC003CCCXX_CGTTAA"]["sequence"], "CGTTAA") self.assertEqual(samples_d["2_121015_BB002BBBXX_TGACCA"]["project_id"], "P002") fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") flowcells = [fc_con.get_entry(x) for x in fc_con.name_view] flowcells_d = {x["name"]: x for x in flowcells} self.assertEqual(flowcells_d["120924_AC003CCCXX"]["name"], "120924_AC003CCCXX") self.assertEqual(flowcells_d["121015_BB002BBBXX"]["name"], "121015_BB002BBBXX") self.assertEqual(flowcells_d["120924_AC003CCCXX"]["entity_type"], "flowcell_run_metrics") p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") projects = [p_con.get_entry(x) for x in p_con.name_view] projects_d = {x["project_name"]: x for x in projects} self.assertEqual(projects_d["J.Doe_00_01"]["min_m_reads_per_sample_ordered"], 0.1) self.assertEqual(projects_d["J.Doe_00_01"]["no_of_samples"], 2) self.assertEqual( set(projects_d["J.Doe_00_01"]["samples"].keys()), set(["P001_101_index3", "P001_102", "P001_103"]) ) self.assertEqual(projects_d["J.Doe_00_01"]["customer_reference"], "GnuGenome") self.assertEqual(projects_d["J.Doe_00_02"]["min_m_reads_per_sample_ordered"], 0.2) self.assertEqual(projects_d["J.Doe_00_03"]["samples"].keys(), ["3_index6"]) self.assertIn("A", projects_d["J.Doe_00_03"]["samples"]["3_index6"]["library_prep"])
def test_2_make_note(self): """Make a note subset by example flowcell and project""" s_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url) fc_con = FlowcellRunMetricsConnection(username=self.user, password=self.pw, url=self.url) p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url) paragraphs = sample_note_paragraphs() headers = sample_note_headers() samples = s_con.get_samples(self.examples["flowcell"], self.examples["project"]) project = p_con.get_entry(self.examples["project"]) samples = p_con.map_srm_to_name(self.examples["project"], fc_id=self.examples["flowcell"], use_bc_map=True) for k,v in samples.items(): s_param = parameters s = s_con.get_entry(k) s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()}) fc = "{}_{}".format(s["date"], s["flowcell"]) s_param["phix_error_rate"] = fc_con.get_phix_error_rate(str(fc), s["lane"]) s_param['avg_quality_score'] = s_con.calc_avg_qv(s["name"]) s_param['rounded_read_count'] = round(float(s_param['rounded_read_count'])/1e6,1) if s_param['rounded_read_count'] else None s_param['customer_name'] = project['samples'][v["sample"]].get('customer_name', None) if project: s_param['ordered_amount'] = p_con.get_ordered_amount(self.examples["project"]) s_param['customer_reference'] = s_param.get('customer_reference', project['customer_reference']) s_param['uppnex_project_id'] = s_param.get('uppnex_project_id', project['uppnex_id']) s_param['success'] = sequencing_success(s_param, cutoffs) s_param.update({k:"N/A" for k in s_param.keys() if s_param[k] is None}) make_note("{}.pdf".format(s["barcode_name"]), headers, paragraphs, **s_param)
def test_get_flowcell(self): """Test getting a flowcell for a given sample""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) fc = sample_con.get_entry(self.examples["sample"], "flowcell") self.assertEqual(str(fc), self.examples["flowcell"])
def test_dbcon(self): """Test database connection and that we get expected values.""" s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") samples = [s_con.get_entry(x) for x in s_con.name_view] samples_d = {x["name"]: x for x in samples} self.assertEqual(samples_d["1_120924_AC003CCCXX_TGACCA"]["date"], "120924") self.assertEqual(samples_d["1_121015_BB002BBBXX_TGACCA"]["flowcell"], "BB002BBBXX") self.assertEqual( samples_d["2_120924_AC003CCCXX_ACAGTG"]["entity_type"], "sample_run_metrics") self.assertEqual(samples_d["3_120924_AC003CCCXX_ACAGTG"]["lane"], "3") self.assertEqual(samples_d["4_120924_AC003CCCXX_CGTTAA"]["sequence"], "CGTTAA") self.assertEqual(samples_d["2_121015_BB002BBBXX_TGACCA"]["project_id"], "P002") fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") flowcells = [fc_con.get_entry(x) for x in fc_con.name_view] flowcells_d = {x["name"]: x for x in flowcells} self.assertEqual(flowcells_d["120924_AC003CCCXX"]["name"], "120924_AC003CCCXX") self.assertEqual(flowcells_d["121015_BB002BBBXX"]["name"], "121015_BB002BBBXX") self.assertEqual(flowcells_d["120924_AC003CCCXX"]["entity_type"], "flowcell_run_metrics") p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") projects = [p_con.get_entry(x) for x in p_con.name_view] projects_d = {x["project_name"]: x for x in projects} self.assertEqual( projects_d["J.Doe_00_01"]["min_m_reads_per_sample_ordered"], 0.1) self.assertEqual(projects_d["J.Doe_00_01"]["no_of_samples"], 2) self.assertEqual(set(projects_d["J.Doe_00_01"]["samples"].keys()), set(["P001_101_index3", "P001_102", "P001_103"])) self.assertEqual(projects_d["J.Doe_00_01"]["customer_reference"], "GnuGenome") self.assertEqual( projects_d["J.Doe_00_02"]["min_m_reads_per_sample_ordered"], 0.2) self.assertEqual(projects_d["J.Doe_00_03"]["samples"].keys(), ["3_index6"]) self.assertIn( "A", projects_d["J.Doe_00_03"]["samples"]["3_index6"]["library_prep"])
def test_2_make_project_note(self): """Make a project note subset by flowcell and project""" s_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url) fc_con = FlowcellRunMetricsConnection(username=self.user, password=self.pw, url=self.url) p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url) paragraphs = project_note_paragraphs() headers = project_note_headers() param = parameters project = p_con.get_entry(self.examples["project"]) if not project: print "No project named {}".format(self.examples["project"]) return if project: ordered_amount = p_con.get_ordered_amount(self.examples["project"]) else: return ordered_amount = self.pargs.ordered_million_reads ## Start collecting the data sample_table = [] sample_list = project['samples'] param.update({key:project.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()}) samples = p_con.map_name_to_srm(self.examples["project"], check_consistency=True, use_bc_map=True) all_passed = True for k,v in samples.items(): if k=="Unexpected": continue project_sample = sample_list[k] vals = {x:project_sample.get(prjs_to_table[x], None) for x in prjs_to_table.keys()} vals['MOrdered'] = ordered_amount vals['BarcodeSeq'] = s_con.get_entry(v.keys()[0], "sequence") ## Set status vals['Status'] = set_status(vals) if vals['Status'] is None else vals['Status'] vals.update({k:"N/A" for k in vals.keys() if vals[k] is None}) if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) if all_passed: param["finished"] = 'Project finished.' sample_table.sort() sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table)) sample_table.insert(0, ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status']) paragraphs["Samples"]["tpl"] = make_sample_table(sample_table) make_note("{}.pdf".format(self.examples["project"]), headers, paragraphs, **param)
def _project_status_note_table(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, param={}, **kw): # mapping project_summary to parameter keys ps_to_parameter = { "scilife_name": "scilife_name", "customer_name": "customer_name", "project_name": "project_name" } # mapping project sample to table table_keys = [ 'ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered' ] output_data = { 'stdout': StringIO(), 'stderr': StringIO(), 'debug': StringIO() } # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) #Get the information source for this project source = p_con.get_info_source(project_name) # Get project summary from project database sample_aliases = _literal_eval_option(sample_aliases, default={}) prj_summary = p_con.get_entry(project_name) if not prj_summary: LOG.warn("No such project '{}'".format(project_name)) return LOG.debug("Working on project '{}'.".format(project_name)) # Determine if project is finished by getting all samples sequenced date try: all_samples_sequenced = prj_summary['project_summary'][ 'all_samples_sequenced'] except (TypeError, KeyError): all_samples_sequenced = False # Get sample run list and loop samples to make mapping sample -> {sampleruns} sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con) samples = {} for s in sample_run_list: prj_sample = p_con.get_project_sample( project_name, s.get("project_sample_name", None)) if prj_sample: sample_name = prj_sample['project_sample'].get( "scilife_name", None) s_d = {s["name"]: {'sample': sample_name, 'id': s["_id"]}} samples.update(s_d) else: if s["barcode_name"] in sample_aliases: s_d = { sample_aliases[s["barcode_name"]]: { 'sample': sample_aliases[s["barcode_name"]], 'id': s["_id"] } } samples.update(s_d) else: s_d = { s["name"]: { 'sample': s["name"], 'id': s["_id"], 'barcode_name': s["barcode_name"] } } LOG.warn( "No mapping found for sample run:\n '{}'".format(s_d)) # Convert to mapping from desired sample name to list of aliases # Less important for the moment; one solution is to update the # Google docs summary table to use the P names sample_dict = prj_summary['samples'] param.update({ key: prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys() }) param["ordered_amount"] = param.get( "ordered_amount", p_con.get_ordered_amount(project_name, samples=sample_dict)) if not param.get('customer_reference'): try: param['customer_reference'] = prj_summary['details'][ 'customer_project_reference'] except (TypeError, KeyError): param['customer_reference'] = prj_summary.get('customer_reference') param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id')) # Override database values if options passed at command line if uppnex_id: param["uppnex_project_id"] = uppnex_id if customer_reference: param["customer_reference"] = customer_reference # Process options ordered_million_reads = _literal_eval_option(ordered_million_reads) exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={}) ## Start collecting the data sample_table = [] samples_excluded = [] last_library_preps = p_con.get_latest_library_prep(project_name) last_library_preps_srm = [ x for l in last_library_preps.values() for x in l ] LOG.debug( "Looping through sample map that maps project sample names to sample run metrics ids" ) for k, v in samples.items(): LOG.debug("project sample '{}' maps to '{}'".format(k, v)) if not include_all_samples: if v['sample'] not in last_library_preps.keys(): LOG.info( "No library prep information for sample {}; keeping in report" .format(v['sample'])) else: if k not in last_library_preps_srm: LOG.info( "Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report" .format( k, v["id"], ",".join( list( set(last_library_preps[ v['sample']].values()))), v['sample'])) continue else: pass if re.search("Unexpected", k): continue barcode_seq = s_con.get_entry(k, "sequence") # Exclude sample id? if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq): samples_excluded.append(v['sample']) continue # Get the project sample name from the sample run and set table values project_sample = sample_dict[v['sample']] vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) # Loop through samples in sample_dict for which there is no sample run information samples_in_table_or_excluded = list(set([x[0] for x in sample_table ])) + samples_excluded samples_not_in_table = list( set(sample_dict.keys()) - set(samples_in_table_or_excluded)) for sample in samples_not_in_table: if re.search("Unexpected", sample): continue project_sample = sample_dict[sample] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict(project_sample, source) if project_sample_d: for k, v in project_sample_d.iteritems(): barcode_seq = s_con.get_entry(k, "sequence") vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) else: barcode_seq = None vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) if all_samples_sequenced: param["finished"] = 'All samples for this project have been sequenced.' sample_table.sort() sample_table = list(sample_table for sample_table, _ in itertools.groupby(sample_table)) sample_table.insert( 0, ['ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered']) return output_data, sample_table, param
def _project_status_note_table(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, param={}, **kw): # mapping project_summary to parameter keys ps_to_parameter = {"scilife_name":"scilife_name", "customer_name":"customer_name", "project_name":"project_name"} # mapping project sample to table table_keys = ['ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered'] output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()} # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) #Get the information source for this project source = p_con.get_info_source(project_name) # Get project summary from project database sample_aliases = _literal_eval_option(sample_aliases, default={}) prj_summary = p_con.get_entry(project_name) if not prj_summary: LOG.warn("No such project '{}'".format(project_name)) return LOG.debug("Working on project '{}'.".format(project_name)) # Determine if project is finished by getting all samples sequenced date try: all_samples_sequenced = prj_summary['project_summary']['all_samples_sequenced'] except (TypeError,KeyError): all_samples_sequenced = False # Get sample run list and loop samples to make mapping sample -> {sampleruns} sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con) samples = {} for s in sample_run_list: prj_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None)) if prj_sample: sample_name = prj_sample['project_sample'].get("scilife_name", None) s_d = {s["name"] : {'sample':sample_name, 'id':s["_id"]}} samples.update(s_d) else: if s["barcode_name"] in sample_aliases: s_d = {sample_aliases[s["barcode_name"]] : {'sample':sample_aliases[s["barcode_name"]], 'id':s["_id"]}} samples.update(s_d) else: s_d = {s["name"]:{'sample':s["name"], 'id':s["_id"], 'barcode_name':s["barcode_name"]}} LOG.warn("No mapping found for sample run:\n '{}'".format(s_d)) # Convert to mapping from desired sample name to list of aliases # Less important for the moment; one solution is to update the # Google docs summary table to use the P names sample_dict = prj_summary['samples'] param.update({key:prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()}) param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name, samples=sample_dict)) if not param.get('customer_reference') : try: param['customer_reference'] = prj_summary['details']['customer_project_reference'] except (TypeError,KeyError): param['customer_reference'] = prj_summary.get('customer_reference') param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id')) # Override database values if options passed at command line if uppnex_id: param["uppnex_project_id"] = uppnex_id if customer_reference: param["customer_reference"] = customer_reference # Process options ordered_million_reads = _literal_eval_option(ordered_million_reads) exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={}) ## Start collecting the data sample_table = [] samples_excluded = [] last_library_preps = p_con.get_latest_library_prep(project_name) last_library_preps_srm = [x for l in last_library_preps.values() for x in l] LOG.debug("Looping through sample map that maps project sample names to sample run metrics ids") for k,v in samples.items(): LOG.debug("project sample '{}' maps to '{}'".format(k, v)) if not include_all_samples: if v['sample'] not in last_library_preps.keys(): LOG.info("No library prep information for sample {}; keeping in report".format(v['sample'])) else: if k not in last_library_preps_srm: LOG.info("Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report".format(k, v["id"], ",".join(list(set(last_library_preps[v['sample']].values()))), v['sample'])) continue else: pass if re.search("Unexpected", k): continue barcode_seq = s_con.get_entry(k, "sequence") # Exclude sample id? if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq): samples_excluded.append(v['sample']) continue # Get the project sample name from the sample run and set table values project_sample = sample_dict[v['sample']] vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) # Loop through samples in sample_dict for which there is no sample run information samples_in_table_or_excluded = list(set([x[0] for x in sample_table])) + samples_excluded samples_not_in_table = list(set(sample_dict.keys()) - set(samples_in_table_or_excluded)) for sample in samples_not_in_table: if re.search("Unexpected", sample): continue project_sample = sample_dict[sample] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict(project_sample, source) if project_sample_d: for k,v in project_sample_d.iteritems(): barcode_seq = s_con.get_entry(k, "sequence") vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) else: barcode_seq = None vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) if all_samples_sequenced: param["finished"] = 'All samples for this project have been sequenced.' sample_table.sort() sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table)) sample_table.insert(0, ['ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered']) return output_data, sample_table, param
def project_status_note(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, **kw): """Make a project status note. Used keywords: :param project_name: project name :param user: db user name :param password: db password :param url: db url :param use_ps_map: use project summary mapping :param use_bc_map: use project to barcode name mapping :param check_consistency: check consistency between mappings :param ordered_million_reads: number of ordered reads in millions :param uppnex_id: the uppnex id :param customer_reference: customer project name :param exclude_sample_ids: exclude some sample ids from project note :param project_alias: project alias name :param sample_aliases: sample alias names :param projectdb: project db name :param samplesdb: samples db name :param flowcelldb: flowcells db name :param include_all_samples: include all samples in report """ # parameters parameters = { "project_name": project_name, "finished": "Not finished, or cannot yet assess if finished.", } # mapping project_summary to parameter keys ps_to_parameter = { "scilife_name": "scilife_name", "customer_name": "customer_name", "project_name": "project_name" } # mapping project sample to table table_keys = [ 'ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status' ] output_data = { 'stdout': StringIO(), 'stderr': StringIO(), 'debug': StringIO() } # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) # Set report paragraphs paragraphs = project_note_paragraphs() headers = project_note_headers() # Set local param variable param = parameters # Get project summary from project database sample_aliases = _literal_eval_option(sample_aliases, default={}) prj_summary = p_con.get_entry(project_name) if not prj_summary: LOG.warn("No such project '{}'".format(project_name)) return LOG.debug("Working on project '{}'.".format(project_name)) # Get sample run list and loop samples to make mapping sample -> {sampleruns} sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con) samples = {} for s in sample_run_list: prj_sample = p_con.get_project_sample( project_name, s.get("project_sample_name", None)) if prj_sample: sample_name = prj_sample['project_sample'].get( "scilife_name", None) s_d = {s["name"]: {'sample': sample_name, 'id': s["_id"]}} samples.update(s_d) else: if s["barcode_name"] in sample_aliases: s_d = { sample_aliases[s["barcode_name"]]: { 'sample': sample_aliases[s["barcode_name"]], 'id': s["_id"] } } samples.update(s_d) else: s_d = { s["name"]: { 'sample': s["name"], 'id': s["_id"], 'barcode_name': s["barcode_name"] } } LOG.warn( "No mapping found for sample run:\n '{}'".format(s_d)) # Convert to mapping from desired sample name to list of aliases # Less important for the moment; one solution is to update the # Google docs summary table to use the P names sample_dict = prj_summary['samples'] param.update({ key: prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys() }) param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name)) param['customer_reference'] = param.get( 'customer_reference', prj_summary.get('customer_reference')) param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id')) # Override database values if options passed at command line if uppnex_id: param["uppnex_project_id"] = uppnex_id if customer_reference: param["customer_reference"] = customer_reference # Process options ordered_million_reads = _literal_eval_option(ordered_million_reads) exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={}) ## Start collecting the data sample_table = [] samples_excluded = [] all_passed = True last_library_preps = p_con.get_latest_library_prep(project_name) last_library_preps_srm = [ x for l in last_library_preps.values() for x in l ] LOG.debug( "Looping through sample map that maps project sample names to sample run metrics ids" ) for k, v in samples.items(): LOG.debug("project sample '{}' maps to '{}'".format(k, v)) if not include_all_samples: if v['sample'] not in last_library_preps.keys(): LOG.info( "No library prep information for sample {}; keeping in report" .format(v['sample'])) else: if k not in last_library_preps_srm: LOG.info( "Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report" .format(k, v["id"], last_library_preps[v['sample']].values()[0], v['sample'])) continue else: pass if re.search("Unexpected", k): continue barcode_seq = s_con.get_entry(k, "sequence") # Exclude sample id? if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq): samples_excluded.append(v['sample']) continue # Get the project sample name from the sample run and set table values project_sample = sample_dict[v['sample']] vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param) if vals['Status'] == "N/A" or vals['Status'] == "NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) # Loop through samples in sample_dict for which there is no sample run information samples_in_table_or_excluded = list(set([x[0] for x in sample_table ])) + samples_excluded samples_not_in_table = list( set(sample_dict.keys()) - set(samples_in_table_or_excluded)) for sample in samples_not_in_table: if re.search("Unexpected", sample): continue project_sample = sample_dict[sample] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict(project_sample) if project_sample_d: for k, v in project_sample_d.iteritems(): barcode_seq = s_con.get_entry(k, "sequence") vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status'] == "N/A" or vals['Status'] == "NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) else: barcode_seq = None vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status'] == "N/A" or vals['Status'] == "NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) if all_passed: param["finished"] = 'Project finished.' sample_table.sort() sample_table = list(sample_table for sample_table, _ in itertools.groupby(sample_table)) sample_table.insert(0, [ 'ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status' ]) paragraphs["Samples"]["tpl"] = make_sample_table(sample_table) make_note("{}_project_summary.pdf".format(project_name), headers, paragraphs, **param) make_rest_note("{}_project_summary.rst".format(project_name), sample_table=sample_table, report="project_report", **param) param.update( {k: "N/A" for k in param.keys() if param[k] is None or param[k] == ""}) output_data["debug"].write( json.dumps({ 'param': param, 'table': sample_table })) return output_data
def project_status_note(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, **kw): """Make a project status note. Used keywords: :param project_name: project name :param user: db user name :param password: db password :param url: db url :param use_ps_map: use project summary mapping :param use_bc_map: use project to barcode name mapping :param check_consistency: check consistency between mappings :param ordered_million_reads: number of ordered reads in millions :param uppnex_id: the uppnex id :param customer_reference: customer project name :param exclude_sample_ids: exclude some sample ids from project note :param project_alias: project alias name :param sample_aliases: sample alias names :param projectdb: project db name :param samplesdb: samples db name :param flowcelldb: flowcells db name :param include_all_samples: include all samples in report """ # parameters parameters = { "project_name" : project_name, "finished" : "Not finished, or cannot yet assess if finished.", } # mapping project_summary to parameter keys ps_to_parameter = {"scilife_name":"scilife_name", "customer_name":"customer_name", "project_name":"project_name"} # mapping project sample to table table_keys = ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status'] output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()} # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) # Set report paragraphs paragraphs = project_note_paragraphs() headers = project_note_headers() # Set local param variable param = parameters # Get project summary from project database sample_aliases = _literal_eval_option(sample_aliases, default={}) prj_summary = p_con.get_entry(project_name) if not prj_summary: LOG.warn("No such project '{}'".format(project_name)) return LOG.debug("Working on project '{}'.".format(project_name)) # Get sample run list and loop samples to make mapping sample -> {sampleruns} sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con) samples = {} for s in sample_run_list: prj_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None)) if prj_sample: sample_name = prj_sample['project_sample'].get("scilife_name", None) s_d = {s["name"] : {'sample':sample_name, 'id':s["_id"]}} samples.update(s_d) else: if s["barcode_name"] in sample_aliases: s_d = {sample_aliases[s["barcode_name"]] : {'sample':sample_aliases[s["barcode_name"]], 'id':s["_id"]}} samples.update(s_d) else: s_d = {s["name"]:{'sample':s["name"], 'id':s["_id"], 'barcode_name':s["barcode_name"]}} LOG.warn("No mapping found for sample run:\n '{}'".format(s_d)) # Convert to mapping from desired sample name to list of aliases # Less important for the moment; one solution is to update the # Google docs summary table to use the P names sample_dict = prj_summary['samples'] param.update({key:prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()}) param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name)) param['customer_reference'] = param.get('customer_reference', prj_summary.get('customer_reference')) param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id')) # Override database values if options passed at command line if uppnex_id: param["uppnex_project_id"] = uppnex_id if customer_reference: param["customer_reference"] = customer_reference # Process options ordered_million_reads = _literal_eval_option(ordered_million_reads) exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={}) ## Start collecting the data sample_table = [] samples_excluded = [] all_passed = True last_library_preps = p_con.get_latest_library_prep(project_name) last_library_preps_srm = [x for l in last_library_preps.values() for x in l] LOG.debug("Looping through sample map that maps project sample names to sample run metrics ids") for k,v in samples.items(): LOG.debug("project sample '{}' maps to '{}'".format(k, v)) if not include_all_samples: if v['sample'] not in last_library_preps.keys(): LOG.info("No library prep information for sample {}; keeping in report".format(v['sample'])) else: if k not in last_library_preps_srm: LOG.info("Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report".format(k, v["id"], last_library_preps[v['sample']].values()[0], v['sample'])) continue else: pass if re.search("Unexpected", k): continue barcode_seq = s_con.get_entry(k, "sequence") # Exclude sample id? if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq): samples_excluded.append(v['sample']) continue # Get the project sample name from the sample run and set table values project_sample = sample_dict[v['sample']] vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param) if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) # Loop through samples in sample_dict for which there is no sample run information samples_in_table_or_excluded = list(set([x[0] for x in sample_table])) + samples_excluded samples_not_in_table = list(set(sample_dict.keys()) - set(samples_in_table_or_excluded)) for sample in samples_not_in_table: if re.search("Unexpected", sample): continue project_sample = sample_dict[sample] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict(project_sample) if project_sample_d: for k,v in project_sample_d.iteritems(): barcode_seq = s_con.get_entry(k, "sequence") vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) else: barcode_seq = None vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) if all_passed: param["finished"] = 'Project finished.' sample_table.sort() sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table)) sample_table.insert(0, ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status']) paragraphs["Samples"]["tpl"] = make_sample_table(sample_table) make_note("{}_project_summary.pdf".format(project_name), headers, paragraphs, **param) make_rest_note("{}_project_summary.rst".format(project_name), sample_table=sample_table, report="project_report", **param) param.update({k:"N/A" for k in param.keys() if param[k] is None or param[k] == ""}) output_data["debug"].write(json.dumps({'param':param, 'table':sample_table})) return output_data
class TestQCUpload(PmFullTest): def setUp(self): """FIXME: All other tests depend on data being uploaded, so these are not real unit tests. The setup to TestQCUpload has to be run prior to other tests, else unexpected failures will occur.""" self.app = self.make_app( argv=['qc', 'upload-qc', flowcells[0], '--mtime', '10000'], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() self.app = self.make_app( argv=['qc', 'upload-qc', flowcells[1], '--mtime', '10000'], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") def test_samplesheet(self): """Test samplesheet upload""" fc = self.fc_con.get_entry("120924_AC003CCCXX") self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA") self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01") self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX") self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19") self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P002_101_index3") def test_qc_upload(self): """Test running qc upload to server. Slightly circular testing here - I setup the module with qc update so by definition the test must 'work'""" self.app = self.make_app( argv=['qc', 'upload-qc', flowcells[1], '--mtime', '100'], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertIsNone(s["project_sample_name"]) self.assertEqual(s["project_id"], "P003") def test_qc_update(self): """Test running qc update of a project id""" s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") s["project_id"] = None self.assertIsNone(s["project_id"]) self.s_con.save(s) self.app = self.make_app(argv=[ 'qc', 'update', '--sample_prj', projects[2], '--project_id', 'P003', '--debug', '--force' ], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertEqual(s["project_id"], "P003") def test_qc_update_sample_names(self): """Test running qc update of project sample names""" s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") s1["project_sample_name"] = None s2["project_sample_name"] = None self.assertIsNone(s1["project_sample_name"]) self.assertIsNone(s2["project_sample_name"]) self.s_con.save(s1) self.s_con.save(s2) sample_map = { 'P001_101_index3': 'P001_101_index3', 'P001_102_index6': 'P001_102' } self.app = self.make_app(argv=[ 'qc', 'update', '--sample_prj', projects[0], '--names', "{}".format(sample_map), '--debug', '--force' ], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") self.assertEqual(s1["project_sample_name"], "P001_101_index3") self.assertEqual(s2["project_sample_name"], "P001_102")
class TestQCUpload(PmFullTest): def setUp(self): self.app = self.make_app( argv=["qc", "upload-qc", flowcells[0], "--mtime", "10000"], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") def test_samplesheet(self): """Test samplesheet upload""" fc = self.fc_con.get_entry("120924_AC003CCCXX") self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA") self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01") self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX") self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19") self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P001_101_index3") def test_qc_upload(self): """Test running qc upload to server""" self.app = self.make_app( argv=["qc", "upload-qc", flowcells[1], "--mtime", "100"], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertIsNone(s["project_sample_name"]) self.assertEqual(s["project_id"], "P003") def test_qc_update(self): """Test running qc update of a project id""" s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") s["project_id"] = None self.assertIsNone(s["project_id"]) self.s_con.save(s) self.app = self.make_app( argv=["qc", "update", "--sample_prj", projects[2], "--project_id", "P003", "--debug", "--force"], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertEqual(s["project_id"], "P003") def test_qc_update_sample_names(self): """Test running qc update of project sample names""" s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") s1["project_sample_name"] = None s2["project_sample_name"] = None self.assertIsNone(s1["project_sample_name"]) self.assertIsNone(s2["project_sample_name"]) self.s_con.save(s1) self.s_con.save(s2) sample_map = {"P001_101_index3": "P001_101_index3", "P001_102_index6": "P001_102"} self.app = self.make_app( argv=[ "qc", "update", "--sample_prj", projects[0], "--names", "{}".format(sample_map), "--debug", "--force", ], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") self.assertEqual(s1["project_sample_name"], "P001_101_index3") self.assertEqual(s2["project_sample_name"], "P001_102")
def test_2_make_project_note(self): """Make a project note subset by flowcell and project""" s_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url) fc_con = FlowcellRunMetricsConnection(username=self.user, password=self.pw, url=self.url) p_con = ProjectSummaryConnection(username=self.user, password=self.pw, url=self.url) paragraphs = project_note_paragraphs() headers = project_note_headers() param = parameters project = p_con.get_entry(self.examples["project"]) if not project: print "No project named {}".format(self.examples["project"]) return if project: ordered_amount = p_con.get_ordered_amount(self.examples["project"]) else: return ordered_amount = self.pargs.ordered_million_reads ## Start collecting the data sample_table = [] sample_list = project['samples'] param.update({ key: project.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys() }) samples = p_con.map_name_to_srm(self.examples["project"], check_consistency=True, use_bc_map=True) all_passed = True for k, v in samples.items(): if k == "Unexpected": continue project_sample = sample_list[k] vals = { x: project_sample.get(prjs_to_table[x], None) for x in prjs_to_table.keys() } vals['MOrdered'] = ordered_amount vals['BarcodeSeq'] = s_con.get_entry(v.keys()[0], "sequence") ## Set status vals['Status'] = set_status( vals) if vals['Status'] is None else vals['Status'] vals.update({k: "N/A" for k in vals.keys() if vals[k] is None}) if vals['Status'] == "N/A" or vals['Status'] == "NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) if all_passed: param["finished"] = 'Project finished.' sample_table.sort() sample_table = list( sample_table for sample_table, _ in itertools.groupby(sample_table)) sample_table.insert(0, [ 'ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status' ]) paragraphs["Samples"]["tpl"] = make_sample_table(sample_table) make_note("{}.pdf".format(self.examples["project"]), headers, paragraphs, **param)
class TestQCUpload(PmFullTest): def setUp(self): """FIXME: All other tests depend on data being uploaded, so these are not real unit tests. The setup to TestQCUpload has to be run prior to other tests, else unexpected failures will occur.""" self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[0], '--mtime', '10000'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[1], '--mtime', '10000'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") def test_samplesheet(self): """Test samplesheet upload""" fc = self.fc_con.get_entry("120924_AC003CCCXX") self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA") self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01") self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX") self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19") self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P002_101_index3") def test_qc_upload(self): """Test running qc upload to server. Slightly circular testing here - I setup the module with qc update so by definition the test must 'work'""" self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[1], '--mtime', '100'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertIsNone(s["project_sample_name"]) self.assertEqual(s["project_id"], "P003") def test_qc_update(self): """Test running qc update of a project id""" s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") s["project_id"]= None self.assertIsNone(s["project_id"]) self.s_con.save(s) self.app = self.make_app(argv = ['qc', 'update', '--sample_prj', projects[2], '--project_id', 'P003', '--debug', '--force'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertEqual(s["project_id"], "P003") def test_qc_update_sample_names(self): """Test running qc update of project sample names""" s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") s1["project_sample_name"] = None s2["project_sample_name"] = None self.assertIsNone(s1["project_sample_name"]) self.assertIsNone(s2["project_sample_name"]) self.s_con.save(s1) self.s_con.save(s2) sample_map = {'P001_101_index3': 'P001_101_index3', 'P001_102_index6':'P001_102'} self.app = self.make_app(argv = ['qc', 'update', '--sample_prj', projects[0], '--names', "{}".format(sample_map), '--debug', '--force'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") self.assertEqual(s1["project_sample_name"], "P001_101_index3") self.assertEqual(s2["project_sample_name"], "P001_102")