def test_process_project_data_and_account(self): fa=Find_and_register_new_project_data(projet_info_path=os.path.join('.','data/check_project_data'),\ dbconfig=self.dbconfig,\ user_account_template='template/email_notification/send_new_account_info.txt',\ log_slack=False,\ setup_irods=False,\ notify_user=False,\ check_hpc_user=False,\ ) fa.process_project_data_and_account() dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) base.start_session() pa=ProjectAdaptor(**{'session':base.session}) project_exists=pa.check_project_records_igf_id(project_igf_id='IGFP0002_test_23-5-2017_rna') self.assertTrue(project_exists) ua=UserAdaptor(**{'session':base.session}) user_exists=ua.check_user_records_email_id(email_id='*****@*****.**') self.assertTrue(user_exists) user1=ua.fetch_user_records_email_id(user_email_id='*****@*****.**') self.assertEqual(user1.name,'User2') sa=SampleAdaptor(**{'session':base.session}) sample_exists=sa.check_sample_records_igf_id(sample_igf_id='IGF00006') self.assertTrue(sample_exists) project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\ email_id='*****@*****.**') self.assertTrue(project_user_exists) project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\ email_id='*****@*****.**') self.assertTrue(project_user_exists) base.close_session()
def test_check_project_and_sample(self): sa = SampleAdaptor(**{'session_class': self.session_class}) sample_data = [ { 'sample_igf_id': 'IGFS001', 'library_id': 'IGFS001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS002', 'library_id': 'IGFS002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS003', 'library_id': 'IGFS003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS004', 'library_id': 'IGFS004', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, ] sa.start_session() sa.store_sample_and_attribute_data(data=sample_data) sa1=sa.check_project_and_sample(project_igf_id='IGFP0001_test_22-8-2017_rna',\ sample_igf_id='IGFS001') self.assertEqual(sa1, True) sa2=sa.check_project_and_sample(project_igf_id='IGFP0001_test_22-8-2017_rna',\ sample_igf_id='IGFS0011') self.assertEqual(sa2, False) sa.close_session()
def setUp(self): self.dbconfig='data/dbconfig.json' self.new_project_data='data/check_project_data/new_project_data.csv' dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class=base.session_class base.start_session() ua=UserAdaptor(**{'session':base.session}) user_data=[{'name':'user1','email_id':'*****@*****.**','username':'******'}, {'name':'igf','email_id':'*****@*****.**','username':'******'}] ua.store_user_data(data=user_data) project_data=[{'project_igf_id':'IGFP0001_test_22-8-2017_rna', 'project_name':'test_22-8-2017_rna', 'description':'Its project 1', 'project_deadline':'Before August 2017', 'comments':'Some samples are treated with drug X', }] pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) project_user_data=[{'project_igf_id':'IGFP0001_test_22-8-2017_rna', 'email_id':'*****@*****.**', 'data_authority':True}] pa.assign_user_to_project(data=project_user_data) sample_data=[{'sample_igf_id':'IGF00001', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00002', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00003', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00004', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, {'sample_igf_id':'IGF00005', 'project_igf_id':'IGFP0001_test_22-8-2017_rna',}, ] sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) base.close_session() new_project_data=[{'project_igf_id':'IGFP0002_test_23-5-2017_rna', 'name':'user2', 'email_id':'*****@*****.**', 'sample_igf_id':'IGF00006', }, {'project_igf_id':'IGFP0003_test_24-8-2017_rna', 'name':'user2', 'email_id':'*****@*****.**', 'sample_igf_id':'IGF00007', 'barcode_check':'OFF' }] pd.DataFrame(new_project_data).to_csv(os.path.join('.',self.new_project_data))
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam=read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base = BaseAdaptor(**{'session_class':self.session_class}) base.start_session() platform_data=[{ "platform_igf_id" : "M001", "model_name" : "MISEQ" , "vendor_name" : "ILLUMINA" , "software_name" : "RTA", "software_version" : "RTA1.18.54"}] # platform data flowcell_rule_data=[{"platform_igf_id":"M001", "flowcell_type":"MISEQ", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}] # flowcell rule data pl=PlatformAdaptor(**{'session':base.session}) pl.store_platform_data(data=platform_data) # loading platform data pl.store_flowcell_barcode_rule(data=flowcell_rule_data) # loading flowcell rules data project_data=[{'project_igf_id':'ProjectA'}] # project data pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) # load project data sample_data=[{'sample_igf_id':'SampleA', 'project_igf_id':'ProjectA'}] # sample data sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) # store sample data seqrun_data=[{'seqrun_igf_id':'SeqrunA', 'flowcell_id':'000000000-D0YLK', 'platform_igf_id':'M001', 'flowcell':'MISEQ'}] # seqrun data sra=SeqrunAdaptor(**{'session':base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) # load seqrun data experiment_data=[{'experiment_igf_id':'ExperimentA', 'sample_igf_id':'SampleA', 'library_name':'SampleA', 'platform_name':'MISEQ', 'project_igf_id':'ProjectA'}] # experiment data ea=ExperimentAdaptor(**{'session':base.session}) ea.store_project_and_attribute_data(data=experiment_data) # load experiment data base.commit_session() base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam=read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base.start_session() project_data=[{'project_igf_id':'ProjectA'}] pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) # load project data sample_data=[{'sample_igf_id':'SampleA', 'project_igf_id':'ProjectA'}] # sample data sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) # store sample data experiment_data=[{'experiment_igf_id':'ExperimentA', 'sample_igf_id':'SampleA', 'library_name':'SampleA', 'platform_name':'MISEQ', 'project_igf_id':'ProjectA'}] # experiment data ea=ExperimentAdaptor(**{'session':base.session}) ea.store_project_and_attribute_data(data=experiment_data) self.temp_dir=get_temp_dir() temp_files=['a.csv','b.csv'] for temp_file in temp_files: with open(os.path.join(self.temp_dir,temp_file),'w') as fp: fp.write('A') collection_data=[{'name':'ExperimentA', 'type':'AnalysisA_html', 'table':'experiment', 'file_path':os.path.join(self.temp_dir,temp_file)} for temp_file in temp_files] ca=CollectionAdaptor(**{'session':base.session}) ca.load_file_and_create_collection(data=collection_data, calculate_file_size_and_md5=False) base.close_session()
def setUp(self): self.path = 'data/seqrun_dir' self.dbconfig = 'data/dbconfig.json' self.md5_out_path = 'data/md5_dir' self.pipeline_name = 'demultiplexing_fastq' seqrun_json = 'data/seqrun_db_data.json' platform_json = 'data/platform_db_data.json' pipeline_json = 'data/pipeline_data.json' os.mkdir(self.md5_out_path) dbparam = None with open(self.dbconfig, 'r') as json_data: dbparam = json.load(json_data) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] self.pipeline_name = '' Base.metadata.create_all(self.engine) base.start_session() user_data = [ { 'name': 'user1', 'email_id': '*****@*****.**', 'username': '******' }, ] ua = UserAdaptor(**{'session': base.session}) ua.store_user_data(data=user_data) project_data = [{ 'project_igf_id': 'project_1', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) project_user_data = [{ 'project_igf_id': 'project_1', 'email_id': '*****@*****.**', 'data_authority': True }] pa.assign_user_to_project(data=project_user_data) sample_data = [ { 'sample_igf_id': 'IGF0001', 'project_igf_id': 'project_1', }, { 'sample_igf_id': 'IGF0002', 'project_igf_id': 'project_1', }, { 'sample_igf_id': 'IGF0003', 'project_igf_id': 'project_1', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) base.commit_session() with open(pipeline_json, 'r') as json_data: # store pipeline data to db pipeline_data = json.load(json_data) pa = PipelineAdaptor(**{'session': base.session}) pa.store_pipeline_data(data=pipeline_data) with open(platform_json, 'r') as json_data: # store platform data to db platform_data = json.load(json_data) pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) with open(seqrun_json, 'r') as json_data: # store seqrun data to db seqrun_data = json.load(json_data) sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) base.close_session()
def load_file_to_disk_and_db(self, input_file_list, withdraw_exisitng_collection=True, autosave_db=True, file_suffix=None, force=True, remove_file=False): ''' A method for loading analysis results to disk and database. File will be moved to a new path if base_path is present. Directory structure of the final path is based on the collection_table information. Following will be the final directory structure if base_path is present project - base_path/project_igf_id/analysis_name sample - base_path/project_igf_id/sample_igf_id/analysis_name experiment - base_path/project_igf_id/sample_igf_id/experiment_igf_id/analysis_name run - base_path/project_igf_id/sample_igf_id/experiment_igf_id/run_igf_id/analysis_name :param input_file_list: A list of input file to load, all using the same collection info :param withdraw_exisitng_collection: Remove existing collection group, DO NOT use this while loading a list of files :param autosave_db: Save changes to database, default True :param file_suffix: Use a specific file suffix, use None if it should be same as original file e.g. input.vcf.gz to output.vcf.gz :param force: Toggle for removing existing file, default True :param remove_file: A toggle for removing existing file from disk, default False :returns: A list of final filepath ''' try: project_igf_id = None sample_igf_id = None experiment_igf_id = None experiment_igf_id = None run_igf_id = None output_path_list = list() # define empty output list dbconnected = False if self.collection_name is None or \ self.collection_type is None or \ self.collection_table is None: raise ValueError('File collection information is incomplete' ) # check for collection information base = BaseAdaptor(**{'session_class': self.dbsession_class}) base.start_session() # connect to db dbconnected = True if self.base_path is not None: if self.collection_table == 'sample': sa = SampleAdaptor(**{'session': base.session}) sample_igf_id = self.collection_name sample_exists = sa.check_sample_records_igf_id( sample_igf_id=sample_igf_id) if not sample_exists: raise ValueError('Sample {0} not found in db'.\ format(sample_igf_id)) project_igf_id = \ sa.fetch_sample_project(sample_igf_id=sample_igf_id) # fetch project id for sample elif self.collection_table == 'experiment': ea = ExperimentAdaptor(**{'session': base.session}) experiment_igf_id = self.collection_name experiment_exists = \ ea.check_experiment_records_id( experiment_igf_id=experiment_igf_id) if not experiment_exists: raise ValueError('Experiment {0} not present in database'.\ format(experiment_igf_id)) (project_igf_id,sample_igf_id) = \ ea.fetch_project_and_sample_for_experiment( experiment_igf_id=experiment_igf_id) # fetch project and sample id for experiment elif self.collection_table == 'run': ra = RunAdaptor(**{'session': base.session}) run_igf_id = self.collection_name run_exists = ra.check_run_records_igf_id( run_igf_id=run_igf_id) if not run_exists: raise ValueError('Run {0} not found in database'.\ format(run_igf_id)) (project_igf_id,sample_igf_id,experiment_igf_id) = \ ra.fetch_project_sample_and_experiment_for_run( run_igf_id=run_igf_id) # fetch project, sample and experiment id for run elif self.collection_table == 'project': pa = ProjectAdaptor(**{'session': base.session}) project_igf_id = self.collection_name project_exists = \ pa.check_project_records_igf_id( project_igf_id=project_igf_id) if not project_exists: raise ValueError('Project {0} not found in database'.\ format(project_igf_id)) if self.rename_file and self.analysis_name is None: raise ValueError('Analysis name is required for renaming file' ) # check analysis name for input_file in input_file_list: final_path = '' if self.base_path is None: # do not move file if base_path is absent final_path = os.path.dirname(input_file) else: # move file path if self.collection_table == 'project': if project_igf_id is None: raise ValueError('Missing project id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, self.analysis_name) # final path for project elif self.collection_table == 'sample': if project_igf_id is None or \ sample_igf_id is None: raise ValueError('Missing project and sample id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, sample_igf_id, self.analysis_name) # final path for sample elif self.collection_table == 'experiment': if project_igf_id is None or \ sample_igf_id is None or \ experiment_igf_id is None: raise ValueError('Missing project,sample and experiment id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join( self.base_path, project_igf_id, sample_igf_id, experiment_igf_id, self.analysis_name) # final path for experiment elif self.collection_table == 'run': if project_igf_id is None or \ sample_igf_id is None or \ experiment_igf_id is None or \ run_igf_id is None: raise ValueError('Missing project,sample,experiment and run id for collection {0}'.\ format(self.collection_name)) final_path = \ os.path.join(\ self.base_path, project_igf_id, sample_igf_id, experiment_igf_id, run_igf_id, self.analysis_name) # final path for run if self.rename_file: new_filename = \ self.get_new_file_name( input_file=input_file, file_suffix=file_suffix) final_path = \ os.path.join( final_path, new_filename) # get new filepath else: final_path = \ os.path.join( final_path, os.path.basename(input_file)) if final_path != input_file: # move file if its required final_path = preprocess_path_name( input_path=final_path ) # remove unexpected characters from file path move_file(source_path=input_file, destinationa_path=final_path, force=force ) # move or overwrite file to destination dir output_path_list.append( final_path) # add final path to the output list self.create_or_update_analysis_collection( file_path=final_path, dbsession=base.session, withdraw_exisitng_collection=withdraw_exisitng_collection, remove_file=remove_file, autosave_db=autosave_db) # load new file collection in db if autosave_db: base.commit_session() # save changes to db for each file base.commit_session() # save changes to db base.close_session() # close db connection return output_path_list except: if dbconnected: base.rollback_session() base.close_session() raise
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() platform_data = [{ "platform_igf_id": "M03291", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, { "platform_igf_id": "NB501820", "model_name": "NEXTSEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }, { "platform_igf_id": "K00345", "model_name": "HISEQ4000", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }] flowcell_rule_data = [{ "platform_igf_id": "K00345", "flowcell_type": "HiSeq 3000/4000 SR", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }, { "platform_igf_id": "K00345", "flowcell_type": "HiSeq 3000/4000 PE", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "NB501820", "flowcell_type": "NEXTSEQ", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "M03291", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) seqrun_data = [{ 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'flowcell_id': '000000000-BRN47', 'platform_igf_id': 'M03291', 'flowcell': 'MISEQ', }, { 'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47', 'flowcell_id': '000000001-BRN47', 'platform_igf_id': 'NB501820', 'flowcell': 'NEXTSEQ', }] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) project_data = [{'project_igf_id': 'projectA'}] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'sampleA', 'project_igf_id': 'projectA', 'species_name': 'HG38' }, { 'sample_igf_id': 'sampleB', 'project_igf_id': 'projectA', 'species_name': 'UNKNOWN' }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) experiment_data = [ { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleA', 'experiment_igf_id': 'sampleA_MISEQ', 'library_name': 'sampleA', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', }, { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleA', 'experiment_igf_id': 'sampleA_NEXTSEQ', 'library_name': 'sampleA', 'library_source': 'UNKNOWN', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'NEXTSEQ', }, { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleB', 'experiment_igf_id': 'sampleB_MISEQ', 'library_name': 'sampleB', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', }, ] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) run_data = [{ 'experiment_igf_id': 'sampleA_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'sampleA_MISEQ_000000000-BRN47_1', 'lane_number': '1' }, { 'experiment_igf_id': 'sampleA_NEXTSEQ', 'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47', 'run_igf_id': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'lane_number': '2' }, { 'experiment_igf_id': 'sampleB_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'sampleB_MISEQ_HVWN7BBXX_1', 'lane_number': '1' }] ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) file_data = [ { 'file_path': '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, { 'file_path': '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, { 'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, ] fa = FileAdaptor(**{'session': base.session}) fa.store_file_and_attribute_data(data=file_data) collection_data = [{ 'name': 'sampleA_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'table': 'run' }, { 'name': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'type': 'demultiplexed_fastq', 'table': 'run' }, { 'name': 'sampleB_MISEQ_HVWN7BBXX_1', 'type': 'demultiplexed_fastq', 'table': 'run' }] collection_files_data = [{ 'name': 'sampleA_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz' }, { 'name': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz' }, { 'name': 'sampleB_MISEQ_HVWN7BBXX_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz' }] ca = CollectionAdaptor(**{'session': base.session}) ca.store_collection_and_attribute_data(data=collection_data) ca.create_collection_group(data=collection_files_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' self.fastq_dir = 'data/collect_fastq_dir/sc_1_8' self.model_name = 'NEXTSEQ' self.flowcell_id = 'TESTABC' self.seqrun_igf_id = '171003_NB500000_0089_TESTABC' self.file_location = 'HPC_PROJECT' self.samplesheet_file = 'data/collect_fastq_dir/sc_1_8/SampleSheet.csv' self.samplesheet_filename = 'SampleSheet.csv' self.manifest_name = 'file_manifest.csv' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.session_class base.start_session() platform_data = [{ "platform_igf_id": "M00001", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, { "platform_igf_id": "NB500000", "model_name": "NEXTSEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }, { "platform_igf_id": "K00000", "model_name": "HISEQ4000", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }] flowcell_rule_data = [{ "platform_igf_id": "K00000", "flowcell_type": "HiSeq 3000/4000 SR", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }, { "platform_igf_id": "K00000", "flowcell_type": "HiSeq 3000/4000 PE", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "NB500000", "flowcell_type": "NEXTSEQ", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "M00001", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'IGF00001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, { 'sample_igf_id': 'IGF00002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) seqrun_data = [{ 'seqrun_igf_id': '171003_NB500000_0089_TESTABC', 'flowcell_id': 'TESTABC', 'platform_igf_id': 'NB500000', 'flowcell': 'NEXTSEQ', }] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) base.start_session() self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'IGF00001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) experiment_data = [ { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00001', 'experiment_igf_id': 'IGF00001_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00003', 'experiment_igf_id': 'IGF00003_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00002', 'experiment_igf_id': 'IGF00002_HISEQ4000', 'library_name': 'IGF00002' }, ] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() self.temp_work_dir = get_temp_dir() self.temp_base_dir = get_temp_dir() self.input_list = ['a.cram', 'a.vcf.gz', 'b.tar.gz'] for file_name in self.input_list: file_path = os.path.join(self.temp_work_dir, file_name) with open(file_path, 'w') as fq: fq.write('AAAA') # create input files base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() platform_data = [{ "platform_igf_id": "M001", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }] # platform data flowcell_rule_data = [{ "platform_igf_id": "M001", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] # flowcell rule data pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) # loading platform data pl.store_flowcell_barcode_rule( data=flowcell_rule_data) # loading flowcell rules data project_data = [{'project_igf_id': 'ProjectA'}] # project data pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data( data=project_data) # load project data sample_data = [{ 'sample_igf_id': 'SampleA', 'project_igf_id': 'ProjectA' }] # sample data sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data( data=sample_data) # store sample data seqrun_data = [{ 'seqrun_igf_id': 'SeqrunA', 'flowcell_id': '000000000-D0YLK', 'platform_igf_id': 'M001', 'flowcell': 'MISEQ' }] # seqrun data sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data( data=seqrun_data) # load seqrun data experiment_data = [{ 'experiment_igf_id': 'ExperimentA', 'sample_igf_id': 'SampleA', 'library_name': 'SampleA', 'platform_name': 'MISEQ', 'project_igf_id': 'ProjectA' }] # experiment data ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data( data=experiment_data) # load experiment data run_data = [{ 'run_igf_id': 'RunA', 'experiment_igf_id': 'ExperimentA', 'seqrun_igf_id': 'SeqrunA', 'lane_number': '1' }] # run data ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) # load run data base.commit_session() base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() # load platform data platform_data=\ [{"platform_igf_id" : "M03291" , "model_name" : "MISEQ" , "vendor_name" : "ILLUMINA" , "software_name" : "RTA" , "software_version" : "RTA1.18.54" }, {"platform_igf_id" : "NB501820", "model_name" : "NEXTSEQ", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }, {"platform_igf_id" : "K00345", "model_name" : "HISEQ4000", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }] flowcell_rule_data=\ [{"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 SR", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}, {"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 PE", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"NB501820", "flowcell_type":"NEXTSEQ", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"M03291", "flowcell_type":"MISEQ", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}] pl = PlatformAdaptor(**{'session_class': base.session_class}) pl.start_session() pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) pl.close_session() # load project data project_data = [{'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA'}] pa = ProjectAdaptor(**{'session_class': base.session_class}) pa.start_session() pa.store_project_and_attribute_data(data=project_data) pa.close_session() # load samples sample_data = [ { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109792', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109793', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109794', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109795', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109796', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797_1', 'expected_read': 40000000 }, ] sa = SampleAdaptor(**{'session_class': base.session_class}) sa.start_session() sa.store_sample_and_attribute_data(data=sample_data) sa.close_session() # load seqrun data seqrun_data = [{ 'flowcell_id': 'HV2GJBBXX', 'platform_igf_id': 'K00345', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX' }] sra = SeqrunAdaptor(**{'session_class': base.session_class}) sra.start_session() sra.store_seqrun_and_attribute_data(data=seqrun_data) sra.close_session() # load experiment data experiment_data=\ [{'experiment_igf_id': 'IGF109792_HISEQ4000', 'library_name': 'IGF109792', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109792', }, {'experiment_igf_id': 'IGF109793_HISEQ4000', 'library_name': 'IGF109793', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109793', }, {'experiment_igf_id': 'IGF109794_HISEQ4000', 'library_name': 'IGF109794', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109794', }, {'experiment_igf_id': 'IGF109795_HISEQ4000', 'library_name': 'IGF109795', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109795', }, {'experiment_igf_id': 'IGF109796_HISEQ4000', 'library_name': 'IGF109796', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109796', }, {'experiment_igf_id': 'IGF109797_HISEQ4000', 'library_name': 'IGF109797', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797', }, ] ea = ExperimentAdaptor(**{'session_class': base.session_class}) ea.start_session() ea.store_project_and_attribute_data(data=experiment_data) ea.close_session() # load run data run_data=\ [{'experiment_igf_id': 'IGF109792_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109792_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':288046541 }, {'experiment_igf_id': 'IGF109793_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109793_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':14666330 }, {'experiment_igf_id': 'IGF109794_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109794_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':5009143 }, {'experiment_igf_id': 'IGF109795_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109795_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1391747 }, {'experiment_igf_id': 'IGF109796_HISEQ4000', 'lane_number': '7', 'run_igf_id': ' IGF109796_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1318008 }, {'experiment_igf_id': 'IGF109797_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109797_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1216324 }, ] ra = RunAdaptor(**{'session_class': base.session_class}) ra.start_session() ra.store_run_and_attribute_data(data=run_data) ra.close_session()
'seqrun_igf_id': '180610_K00345_0063_AHWL7CBBXX', 'lane_number': '1' }, { 'run_igf_id': 'RunC', 'experiment_igf_id': 'ExperimentA', 'seqrun_igf_id': '180410_K00345_0063_AHWL7CBBXX', 'lane_number': '1' }] # run data base.start_session() pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) # loading platform data pl.store_flowcell_barcode_rule( data=flowcell_rule_data) # loading flowcell rules data pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) # load project data sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) # store sample data sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) # load seqrun data ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data( data=experiment_data) # load experiment data ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) # load run data pipeline_data = [{ "pipeline_name": "DemultiplexIlluminaFastq", "pipeline_db": "sqlite:////bcl2fastq.db", }] pipeline_seed_data = [ {
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() # PLATFORM platform_data = [{ "platform_igf_id": "M03291", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }] flowcell_rule_data = [{ "platform_igf_id": "M03291", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) # SEQRUN seqrun_data = [{ 'seqrun_igf_id': '180416_M03291_0139_000000000-TEST', 'flowcell_id': '000000000-TEST', 'platform_igf_id': 'M03291', 'flowcell': 'MISEQ', }, { 'seqrun_igf_id': '180416_M03291_0140_000000000-TEST', 'flowcell_id': '000000000-TEST', 'platform_igf_id': 'M03291', 'flowcell': 'MISEQ', }] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) # PROJECT project_data = [{'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq'}] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) # SAMPLE sample_data = [{ 'sample_igf_id': 'IGF00123', 'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq' }, { 'sample_igf_id': 'IGF00124', 'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq' }] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) # EXPERIMENT experiment_data = [{ 'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq', 'sample_igf_id': 'IGF00123', 'experiment_igf_id': 'IGF00123_MISEQ', 'library_name': 'IGF00123', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'POLYA-RNA', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', 'singlecell_chemistry': 'TENX' }, { 'project_igf_id': 'IGFQ000123_test_10-4-2018_Miseq', 'sample_igf_id': 'IGF00124', 'experiment_igf_id': 'IGF00124_MISEQ', 'library_name': 'IGF00124', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'POLYA-RNA', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', 'singlecell_chemistry': 'TENX' }] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) # RUN run_data = [{ 'experiment_igf_id': 'IGF00123_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-TEST', 'run_igf_id': 'IGF00123_MISEQ_000000000-TEST_1', 'lane_number': '1' }] ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) # PIPELINE pipeline_data = [{ "pipeline_name": "PrimaryAnalysis", "pipeline_db": "sqlite:////aln.db", }, { "pipeline_name": "DemultiplexingFastq", "pipeline_db": "sqlite:////fastq.db", }] pipeline_seed_data = [ { 'pipeline_name': 'PrimaryAnalysis', 'seed_id': 1, 'seed_table': 'experiment' }, { 'pipeline_name': 'PrimaryAnalysis', 'seed_id': 2, 'seed_table': 'experiment' }, { 'pipeline_name': 'DemultiplexingFastq', 'seed_id': 1, 'seed_table': 'seqrun' }, { 'pipeline_name': 'DemultiplexingFastq', 'seed_id': 2, 'seed_table': 'seqrun' }, ] update_data = [{ 'pipeline_name': 'PrimaryAnalysis', 'seed_id': 2, 'seed_table': 'experiment', 'status': 'FINISHED' }, { 'pipeline_name': 'DemultiplexingFastq', 'seed_id': 2, 'seed_table': 'seqrun', 'status': 'FINISHED' }] pla = PipelineAdaptor(**{'session': base.session}) pla.store_pipeline_data(data=pipeline_data) pla.create_pipeline_seed(data=pipeline_seed_data) pla.update_pipeline_seed(update_data) base.close_session()