def _make_seq_run(): """ Make a sequencing run pointed at real data for the tests :return: SequencingRun object """ files_1 = model.SequenceFile([ path.join(path_to_module, "fake_ngs_data", "Data", "Intensities", "BaseCalls", "01-1111_S1_L001_R1_001.fastq.gz"), path.join(path_to_module, "fake_ngs_data", "Data", "Intensities", "BaseCalls", "01-1111_S1_L001_R2_001.fastq.gz"), ]) files_2 = model.SequenceFile([ path.join(path_to_module, "fake_ngs_data", "Data", "Intensities", "BaseCalls", "02-2222_S1_L001_R1_001.fastq.gz"), path.join(path_to_module, "fake_ngs_data", "Data", "Intensities", "BaseCalls", "02-2222_S1_L001_R2_001.fastq.gz"), ]) files_3 = model.SequenceFile([ path.join(path_to_module, "fake_ngs_data", "Data", "Intensities", "BaseCalls", "03-3333_S1_L001_R1_001.fastq.gz"), path.join(path_to_module, "fake_ngs_data", "Data", "Intensities", "BaseCalls", "03-3333_S1_L001_R2_001.fastq.gz"), ]) sample_1 = model.Sample("test_sample", "description", 1) sample_1.sequence_file = files_1 sample_2 = model.Sample("test_sample", "description", 1) sample_2.sequence_file = files_2 sample_3 = model.Sample("test_sample", "description", 1) sample_3.sequence_file = files_3 project = model.Project("test_project", [sample_1, sample_2, sample_3], "description") sequencing_run = model.SequencingRun({"layoutType": "PAIRED_END"}, [project], "miseq") return sequencing_run
def test_parse_samples_valid(self): """ Verify samples created from parser match expected samples :return: """ sheet_file = path.join(path_to_module, "fake_dir_data", "SampleList.csv") sample1 = model.Sample( "my-sample-1", "", ) sample2 = model.Sample( "my-sample-2", "", ) sample3 = model.Sample( "my-sample-3", "", ) res = Parser().get_sequencing_run(sheet_file) self.assertEqual(res.metadata, {'layoutType': 'PAIRED_END'}) self.assertEqual(res.project_list[0].id, "75") self.assertEqual(res.project_list[1].id, "76") self.assertEqual(res.project_list[0].sample_list[0].sample_name, sample1.sample_name) self.assertEqual(res.project_list[0].sample_list[1].sample_name, sample2.sample_name) self.assertEqual(res.project_list[1].sample_list[0].sample_name, sample3.sample_name)
def test_parse_samples_valid(self): """ Ensure a a parsed valid directory matches the expected samples :return: """ sheet_file = path.join(path_to_module, "fake_ngs_data", "SampleSheet.csv") sample1 = model.Sample( "01-1111", "", 1, { "index": "AAAAAAAA", "I7_Index_ID": "N01", "sample_project": "6", "sequencer_sample_ID": "01-1111-4004", "I5_Index_ID": "S01", "index2": "TTTTTTTT" } ) sample2 = model.Sample( "02-2222", "", 2, { "index": "GGGGGGGG", "I7_Index_ID": "N02", "sample_project": "6", "sequencer_sample_ID": "02-2222-4004", "I5_Index_ID": "S02", "index2": "CCCCCCCC" } ) sample3 = model.Sample( "03-3333", "", 3, { "index": "CCCCCCCC", "I7_Index_ID": "N03", "sample_project": "6", "sequencer_sample_ID": "03-3333-4004", "I5_Index_ID": "S03", "index2": "GGGGGGGG" } ) correct_samples = [sample1, sample2, sample3] res = sample_parser._parse_samples(sheet_file) for r_sample, c_sample in zip(res, correct_samples): self.assertEqual(r_sample.get_uploadable_dict(), c_sample.get_uploadable_dict())
def test_parse_samples_valid(self): """ Ensure a a parsed valid directory matches the expected samples :return: """ sheet_file = path.join(path_to_module, "fake_ngs_data", "UploadList.csv") sample1 = model.Sample( "01A100001", "", 1, { "Index": "AAAAAAAA", "sample_project": "6", "Index2": "TTTTTTTT", "description": '' } ) sample2 = model.Sample( "01A100002", "", 2, { "Index": "GGGGGGGG", "sample_project": "6", "Index2": "CCCCCCCC", "description": '' } ) sample3 = model.Sample( "01A100003", "", 3, { "Index": "GGGGGGGG", "sample_project": "6", "Index2": "CCCCCCCC", "description": '' } ) correct_samples = [sample1, sample2, sample3] res = sample_parser._parse_samples(sheet_file) for r_sample, c_sample in zip(res, correct_samples): self.assertEqual(r_sample.get_uploadable_dict(), c_sample.get_uploadable_dict())
def test_parse_out_sequence_file(self): """ Tests that parse out sequence file correctly filters sample related data from the extra params dict And ensures that the uploadable dict correctly includes all the needed data after removal :return: """ sample = model.Sample( "01A100003", "", 3, { "Index": "GGGGGGGG", "sampleProject": "6", "Index2": "CCCCCCCC", "description": '' } ) uploadable_dict = {'Index': 'GGGGGGGG', 'sampleName': '01A100003', 'sampleProject': '6', 'Index2': 'CCCCCCCC', 'description': ''} sequence_file_dict = {'Index': 'GGGGGGGG', 'sampleProject': '6', 'Index2': 'CCCCCCCC', 'description': ''} res = sample_parser._parse_out_sequence_file(sample) self.assertEqual(sample.get_uploadable_dict(), uploadable_dict) self.assertEqual(res, sequence_file_dict)
def test_send_and_get_sample(self): """ Tests sending and receiving sample data :return: """ # set up a project to upload samples to project_name = "test_project_2" project_description = "test_project_description" project = model.Project(name=project_name, description=project_description) proj_json_res = self.test_api.send_project(project) project_identifier = proj_json_res['resource']['identifier'] # upload a sample sample_name = "test_sample" sample_desc = "test_sample_desc" sample = model.Sample(sample_name, sample_desc) sample_json_res = self.test_api.send_sample(sample, project_identifier) # make sure the returned values match what we tried to upload self.assertEqual(sample_json_res['resource']['sampleName'], sample_name) self.assertEqual(sample_json_res['resource']['description'], sample_desc) # get a list of samples on our project and make sure they match what we uploaded sample_list = self.test_api.get_samples(project_identifier) self.assertEqual(len(sample_list), 1) self.assertEqual(type(sample_list[0]), model.Sample) self.assertEqual(sample_list[0].sample_name, sample_name) self.assertEqual(sample_list[0].description, sample_desc)
def test_parse_samples_valid(self): """ Ensure a a parsed valid directory matches the expected samples :return: """ sheet_file = path.join(path_to_module, "fake_nextseq_run", "SampleSheet.csv") sample1 = model.Sample( "SA20121712", "Code Blue", 1, { "Sample_Well": "A01", "index": "TAAGGCGA", "Sample_Plate": "NGS-001 Plate FNC-7", "I7_Index_ID": "N701", "sample_project": "67", "sequencer_sample_name": "SA20121712", "I5_Index_ID": "S502", "index2": "ATAGAGAG", } ) sample2 = model.Sample( "SA20121716", "Code Blue", 2, { "Sample_Well": "A02", "index": "CGTACTAG", "Sample_Plate": "NGS-001 Plate FNC-7", "I7_Index_ID": "N702", "sample_project": "68", "sequencer_sample_name": "SA20121716", "I5_Index_ID": "S502", "index2": "ATAGAGAG", } ) correct_samples = [sample1, sample2] res = sample_parser._parse_samples(sheet_file) for r_sample, c_sample in zip(res, correct_samples): self.assertEqual(r_sample.get_uploadable_dict(), c_sample.get_uploadable_dict())
def test_valid(self): """ Ensure a a parsed valid directory matches the expected sample list :return: """ directory = path.join(path_to_module, "fake_ngs_data") sheet_file = path.join(directory, "SampleSheet.csv") data_dir = path.join( directory, parsers.miseq.Parser.get_relative_data_directory()) file_list = parsers.common.get_file_list(data_dir) sample = model.Sample( "01-1111", "Super bug", 1, { "Sample_Well": "01", "index": "AAAAAAAA", "Sample_Plate": "1", "I7_Index_ID": "N01", "sample_project": "6", "sequencer_sample_name": "01-1111", "I5_Index_ID": "S01", "index2": "TTTTTTTT", }) sequence_file_properties = { 'Sample_Plate': '1', 'Sample_Well': '01', 'I7_Index_ID': 'N01', 'index': 'AAAAAAAA', 'I5_Index_ID': 'S01', 'index2': 'TTTTTTTT' } file_path_1 = path.join(path_to_module, "fake_ngs_data", "Data", "Intensities", "BaseCalls", "01-1111_S1_L001_R1_001.fastq.gz") file_path_2 = path.join(path_to_module, "fake_ngs_data", "Data", "Intensities", "BaseCalls", "01-1111_S1_L001_R2_001.fastq.gz") raw_file_list = [file_path_1, file_path_2] res = sample_parser.parse_sample_list( sample_sheet_file=sheet_file, run_data_directory=data_dir, run_data_directory_file_list=file_list) # Check sample is the same self.assertEqual(res[0].get_uploadable_dict(), sample.get_uploadable_dict()) # Check sequencing file is correct self.assertEqual(res[0].sequence_file.properties_dict, sequence_file_properties) self.assertEqual(res[0].sequence_file.file_list.sort(), raw_file_list.sort())
def test_valid(self): """ Ensure a a parsed valid directory matches the expected sample list :return: """ directory = path.join(path_to_module, "fake_ngs_data") sheet_file = path.join(directory, "SampleSheet.csv") data_dir = path.join( directory, parsers.miniseq.Parser.get_relative_data_directory()) data_dir = data_dir.replace("*", "some_dir") file_list = parsers.common.get_file_list(data_dir) sample = model.Sample( "01-1111", "", 1, { "index": "AAAAAAAA", "I7_Index_ID": "N01", "sample_project": "6", "sequencer_sample_ID": "01-1111-4004", "I5_Index_ID": "S01", "index2": "TTTTTTTT" }) sequence_file_properties = { "sequencer_sample_ID": "01-1111-4004", "index": "AAAAAAAA", "I7_Index_ID": "N01", "I5_Index_ID": "S01", "index2": "TTTTTTTT", "description": "" } file_path_1 = path.join(path_to_module, "fake_ngs_data", "Alignment_1", "some_dir", "Fastq", "01-1111_S1_L001_R1_001.fastq.gz") file_path_2 = path.join(path_to_module, "fake_ngs_data", "Alignment_1", "some_dir", "Fastq", "01-1111_S1_L001_R2_001.fastq.gz") raw_file_list = [file_path_1, file_path_2] res = sample_parser.parse_sample_list( sample_sheet_file=sheet_file, run_data_directory=data_dir, run_data_directory_file_list=file_list) # Check sample is the same self.assertEqual(res[0].get_uploadable_dict(), sample.get_uploadable_dict()) # Check sequencing file is correct self.assertEqual(res[0].sequence_file.properties_dict, sequence_file_properties) self.assertEqual(res[0].sequence_file.file_list.sort(), raw_file_list.sort())
def get_samples(self, project_id): """ API call to api/projects/project_id/samples arguments: project_id -- project identifier from irida returns list of samples for the given project. each sample is a Sample object. """ logging.info("Getting samples from project '{}'".format(project_id)) if project_id not in self.cached_samples: try: project_url = self._get_link(self.base_url, "projects") url = self._get_link(project_url, "project/samples", target_dict={ "key": "identifier", "value": project_id }) except StopIteration: logging.error( "The given project ID doesn't exist: ".format(project_id)) raise exceptions.IridaResourceError( "The given project ID doesn't exist", project_id) response = self._session.get(url) result = response.json()["resource"]["resources"] sample_list = [] for sample_dict in result: # use name and description from dictionary as base parameters when creating sample sample_name = sample_dict['sampleName'] sample_desc = sample_dict['description'] sample_id = int(sample_dict['identifier']) # remove them from the dict so we don't have useless duplicate data del sample_dict['sampleName'] del sample_dict['description'] del sample_dict['identifier'] sample_list.append( model.Sample(sample_name=sample_name, description=sample_desc, samp_dict=sample_dict, sample_id=sample_id)) self.cached_samples[project_id] = sample_list return self.cached_samples[project_id]
def test_valid(self): """ Ensure a a parsed valid directory matches the expected sample list :return: """ sheet_file = path.join(path_to_module, "fake_nextseq_run", "SampleSheet.csv") sample = model.Sample( "SA20121712", "Code Blue", 1, { "Sample_Well": "A01", "index": "TAAGGCGA", "Sample_Plate": "NGS-001 Plate FNC-7", "I7_Index_ID": "N701", "sample_project": "67", "sequencer_sample_name": "SA20121712", "I5_Index_ID": "S502", "index2": "ATAGAGAG", } ) sequence_file_properties = { 'Sample_Plate': "NGS-001 Plate FNC-7", 'Sample_Well': 'A01', 'I7_Index_ID': 'N701', 'index': 'TAAGGCGA', 'I5_Index_ID': 'S502', 'index2': 'ATAGAGAG' } file_path_1 = path.join(path_to_module, "fake_nextseq_run", "Data", "Intensities", "BaseCalls", "67", "SA20121712_S2_R1_001.fastq.qz") file_path_2 = path.join(path_to_module, "fake_nextseq_run", "Data", "Intensities", "BaseCalls", "67", "SA20121712_S2_R2_001.fastq.qz") file_list = [file_path_1, file_path_2] res = sample_parser._parse_sample_list(sheet_file) # Check sample is the same self.assertEqual(res[0].get_uploadable_dict(), sample.get_uploadable_dict()) # Check sequencing file is correct self.assertEqual(res[0].sequence_file.properties_dict, sequence_file_properties) self.assertEqual(res[0].sequence_file.file_list.sort(), file_list.sort())
def test_parse_out_sequence_file(self): """ Tests that parse out sequence file correctly filters sample related data from the extra params dict And ensures that the uploadable dict correctly includes all the needed data after removal :return: """ sample = model.Sample( "03-3333", "Deadly bug", None, { "Sample_Well": "03", "index": "CCCCCCCC", "Sample_Plate": "3", "I7_Index_ID": "N03", "sampleName": "03-3333", "sampleProject": "6", "sequencerSampleId": "03-3333", "I5_Index_ID": "S03", "index2": "GGGGGGGG", "description": "Deadly bug" }) uploadable_dict = { 'Sample_Well': '03', 'index': 'CCCCCCCC', 'Sample_Plate': '3', 'I7_Index_ID': 'N03', 'sampleName': '03-3333', 'sampleProject': '6', 'sequencerSampleId': '03-3333', 'I5_Index_ID': 'S03', 'index2': 'GGGGGGGG', 'description': 'Deadly bug' } sequence_file_dict = { 'Sample_Well': '03', 'index': 'CCCCCCCC', 'Sample_Plate': '3', 'I7_Index_ID': 'N03', 'sampleProject': '6', 'sequencerSampleId': '03-3333', 'I5_Index_ID': 'S03', 'index2': 'GGGGGGGG' } res = sample_parser._parse_out_sequence_file(sample) self.assertEqual(sample.get_uploadable_dict(), uploadable_dict) self.assertEqual(res, sequence_file_dict)
def test_send_and_get_sequence_files(self): """ Tests sending and receiving sequence files :return: """ # upload a project project_name = "test_project_2" project_description = "test_project_description" project = model.Project(name=project_name, description=project_description) proj_json_res = self.test_api.send_project(project) project_identifier = proj_json_res['resource']['identifier'] # upload a sample sample_name = "test_sample" sample_desc = "test_sample_desc" sample = model.Sample(sample_name, sample_desc) self.test_api.send_sample(sample, project_identifier) # upload sequence files sequence_file_list = [ path.join(path_to_module, "fake_dir_data", "file_1.fastq.gz"), path.join(path_to_module, "fake_dir_data", "file_2.fastq.gz") ] sequence_file = model.SequenceFile(sequence_file_list) upload_id = self.test_api.create_seq_run({'layoutType': 'PAIRED_END'}, 'miseq') self.test_api.send_sequence_files(sequence_file, sample_name, project_identifier, upload_id) # verify sequence files match what we sent to IRIDA returned_sequence_files = self.test_api.get_sequence_files( project_identifier, sample_name) self.assertEqual(returned_sequence_files[0]['fileName'], 'file_1.fastq.gz') self.assertEqual(returned_sequence_files[1]['fileName'], 'file_2.fastq.gz')
def test_sample_exists(self): """ Upload a sample and make sure it can be found with the sample_exists method :return: """ # create a project to upload samples to project_name = "test_project_exists" project_description = "test_project_exists_description" project = model.Project(name=project_name, description=project_description) json_res = self.test_api.send_project(project) project_id = json_res['resource']['identifier'] # create and upload a sample, and verify it exists sample_name = "test_sample_exists" sample_desc = "test_sample_exists_desc" sample = model.Sample(sample_name, sample_desc) self.test_api.send_sample(sample, project_id) self.assertTrue(self.test_api.sample_exists(sample_name, project_id))
def test_valid(self): """ Ensure a a parsed valid directory matches the expected sample list :return: """ directory = path.join(path_to_module, "fake_ngs_data") sheet_file = path.join(directory, "UploadList.csv") data_dir = path.join(directory, parsers.nextseq2k_nml.Parser.get_relative_data_directory()) file_list = parsers.common.get_file_list(data_dir) sample = model.Sample( "01A100001", "", 1, { "Index": "AAAAAAAA", "sample_project": "6", "Index2": "TTTTTTTT", } ) sequence_file_properties = { 'Index': 'AAAAAAAA', 'Index2': 'TTTTTTTT', 'description': "", } file_path_1 = path.join(path_to_module, "fake_ngs_data", "Analysis", "1", "Data", "fast1", "01A100001_S1_L001_R1_001.fastq.gz") file_path_2 = path.join(path_to_module, "fake_ngs_data", "Analysis", "1", "Data", "fast1", "01A100001_S1_L001_R2_001.fastq.gz") raw_file_list = [file_path_1, file_path_2] res = sample_parser.parse_sample_list(sample_sheet_file=sheet_file, run_data_directory=data_dir, run_data_directory_file_list=file_list) # Check sample is the same self.assertEqual(res[0].get_uploadable_dict(), sample.get_uploadable_dict()) # Check sequencing file is correct self.assertEqual(res[0].sequence_file.properties_dict, sequence_file_properties) self.assertEqual(res[0].sequence_file.file_list.sort(), raw_file_list.sort())
def test_valid_full_file_path(self, mock_parse_samples): """ Given a valid sample sheet with full file paths, parse correctly :return: """ sheet_file = path.join(path_to_module, "fake_dir_data", "SampleList_simple.csv") file_path_1 = path.join(path_to_module, "fake_dir_data", "file_1.fastq.gz") file_path_2 = path.join(path_to_module, "fake_dir_data", "file_2.fastq.gz") sample_list = [ model.Sample(sample_name='my-sample-1', description="", sample_number=0, samp_dict={ ('sample_project', '75'), ('File_Forward', path.abspath(file_path_1)), ('File_Reverse', path.abspath(file_path_2)) }) ] mock_parse_samples.return_value = sample_list res = sample_parser.build_sample_list_from_sample_sheet_with_abs_path( sheet_file) mock_parse_samples.assert_called_with(sheet_file) # Check we have 1 sample self.assertEqual(len(res), 1) # Check if data is correct self.assertEqual(res[0].sample_name, "my-sample-1") self.assertEqual(res[0].get_uploadable_dict()["sample_project"], "75") self.assertEqual(res[0].get_uploadable_dict()["File_Forward"], path.abspath(file_path_1)) self.assertEqual(res[0].get_uploadable_dict()["File_Reverse"], path.abspath(file_path_2)) self.assertEqual(res[0].sequence_file.file_list[0], file_path_1) self.assertEqual(res[0].sequence_file.file_list[1], file_path_2)
def send_metadata(api_instance, metadata_csv): ''' PURPOSE: Send metadata from qc.csv to IRIDA for each sample INPUTS: - API_INSTANCE --> Irida API instance from generate_api_instance - METADATA_CSV --> CSV file that contains all of the metadata along with the sample name and project id in the first and second columns respectively ''' with open(metadata_csv, 'r') as input_handle: reader = csv.reader(input_handle) for index, row in enumerate(reader): if index == 0: header = row continue else: # Create dictionary of each row for creation of dictionary to upload to irida metadata = {} passing = True for i in range(len(row)): if i == 0 and re.search('sample', header[0]): # Get sample name from row 1 sample_name = row[i] # Get the project ID from the correct header no matter where it is elif re.search('project_id', header[i]): if row[i] == 'Unknown' or row[i] == 'NA': passing = False break # Get project id from row 2 project_id = row[i] else: # Put metadata into metadata dictionary for upload metadata[header[i]] = row[i] if passing: # Check that sample exists and make it if not if api_instance.sample_exists(sample_name=sample_name, project_id=project_id): pass else: irida_sample = model.Sample(sample_name=sample_name) api_instance.send_sample(sample=irida_sample, project_id=project_id) upload_metadata = model.Metadata(metadata=metadata, project_id=project_id, sample_name=sample_name) status = api_instance.send_metadata( upload_metadata, upload_metadata.project_id, upload_metadata.sample_name) print(status, '\n') else: print( 'Unknown sample data for {}, moving to next sample'.format( sample_name))
def _parse_samples(sample_sheet_file): """ Parse all the lines under "[Data]" in .csv file Keys in sample_key_translation_dict have their values changed for uploading to REST API All other keys keep the same name that they have in .csv file arguments: sample_sheet_file -- path to UploadList.csv returns a list containing Sample objects that have been created by a dictionary from the parsed out key:pair values from .csv file """ logging.info("Reading data from sample sheet {}".format(sample_sheet_file)) csv_reader = common.get_csv_reader(sample_sheet_file) # start with an ordered dictionary so that keys are ordered in the same # way that they are inserted. sample_dict = OrderedDict() sample_list = [] sample_key_translation_dict = { 'Sample_ID': 'sampleName', 'Sample_Project': 'sample_project' } _parse_samples.sample_key_translation_dict = sample_key_translation_dict # initialize dictionary keys from first line (data headers/attributes) set_attributes = False for line in csv_reader: if set_attributes: for item in line: if item in sample_key_translation_dict: key_name = sample_key_translation_dict[item] else: key_name = item sample_dict[key_name] = "" break if "[BCLConvert_Data]" in line: set_attributes = True # fill in values for keys. line is currently below the [Data] headers for sample_number, line in enumerate(csv_reader): if len(sample_dict.keys()) != len(line): """ if there is one more Data header compared to the length of data values then add an empty string to the end of data values i.e the Description will be empty string assumes the last Data header is going to be the Description this handles the case where the last trailing comma is trimmed Shaun said this issue may come up when a user edits the SampleSheet from within the MiSeq software """ if len(sample_dict.keys()) - len(line) == 1: line.append("") else: raise exceptions.SampleSheetError( ("Your sample sheet is malformed. Expected to find {} " "columns in the [Data] section, but only found {} columns " "for line {}.".format(len(sample_dict.keys()), len(line), line)), sample_sheet_file ) for index, key in enumerate(sample_dict.keys()): sample_dict[key] = line[index].strip() # assumes values are never empty new_sample_dict = deepcopy(sample_dict) new_sample_name = new_sample_dict['sampleName'] del new_sample_dict['sampleName'] sample = model.Sample( sample_name=new_sample_name, description="", sample_number=sample_number + 1, samp_dict=new_sample_dict) sample_list.append(sample) return sample_list
def _parse_samples(sample_sheet_file): """ Parse all the lines under "[Data]" in .csv file arguments: sample_sheet_file -- path to SampleSheet.csv returns a list containing Sample objects that have been created by a dictionary from the parsed out key:pair values from .csv file """ logging.info("Reading data from sample sheet {}".format(sample_sheet_file)) csv_reader = common.get_csv_reader(sample_sheet_file) # start with an ordered dictionary so that keys are ordered in the same # way that they are inserted. sample_dict = OrderedDict() sample_list = [] sample_key_list = [ 'Sample_Name', 'Project_ID', 'File_Forward', 'File_Reverse' ] # initialize dictionary keys from first line (data headers/attributes) set_attributes = False for line in csv_reader: if set_attributes: for item in line: if item in sample_key_list: key_name = item sample_dict[key_name] = "" break if "[Data]" in line: set_attributes = True # fill in values for keys. line is currently below the [Data] headers for sample_number, line in enumerate(csv_reader): # if the line is empty (like a blank line at the end of the file) continue if not line: continue if len(sample_dict.keys()) != len(line): """ if there is one more Data header compared to the length of data values then add an empty string to the end of data values i.e the File_Reverse will be empty string assumes the last Data header is going to be the File_Reverse this handles the case where the last trailing comma is trimmed when doing a single end run """ if len(sample_dict.keys()) - len(line) == 1: line.append("") else: raise exceptions.SampleSheetError(( "Your sample sheet is malformed. Expected to find {} " "columns in the [Data] section, but only found {} columns " "for line {}.".format(len(sample_dict.keys()), len(line), line)), sample_sheet_file) for index, key in enumerate(sample_dict.keys()): value = line[index].strip() # Keys other than 'File_Reverse' cannot be empty if len(value) is 0: # no value if key != 'File_Reverse': raise exceptions.SampleSheetError(( "Your sample sheet is malformed. {} in the [Data] section cannot be empty." "".format(key)), sample_sheet_file) sample_dict[key] = value sample_key_list = [ 'Sample_Name', 'Project_ID', 'File_Forward', 'File_Reverse' ] new_sample_dict = deepcopy(sample_dict) new_sample_name = new_sample_dict['Sample_Name'] new_sample_project = new_sample_dict['Project_ID'] new_sample_dict['sample_project'] = new_sample_project del new_sample_dict['Sample_Name'] del new_sample_dict['Project_ID'] sample = model.Sample(sample_name=new_sample_name, description="", sample_number=sample_number + 1, samp_dict=new_sample_dict) sample_list.append(sample) return sample_list