def setUp(self): self.dbconfig = 'data/dbconfig.json' self.platform_json = 'data/platform_db_data.json' self.seqrun_json = 'data/seqrun_db_data.json' self.pipeline_json = 'data/pipeline_data.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() # load platform data pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=read_json_data(self.platform_json)) # load seqrun data sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data( data=read_json_data(self.seqrun_json)) # load platform data pla = PipelineAdaptor(**{'session': base.session}) pla.store_pipeline_data(data=read_json_data(self.pipeline_json)) pipeline_seed_data = [ { 'pipeline_name': 'demultiplexing_fastq', 'seed_id': '1', 'seed_table': 'seqrun' }, ] pla.create_pipeline_seed(data=pipeline_seed_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) self.base = BaseAdaptor(**dbparam) self.engine = self.base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine)
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class()
def __init__( self, igf_id_list, table_name, pipeline_name, dbconfig_file, log_slack=True, log_asana=True, slack_config=None, asana_project_id=None, asana_config=None, clean_up=True, ): ''' :param igf_id_list: A list of igf ids to uniquely identify the entity :param table_name: A database table name to look for the igf id available options are 'project','sample','experiment','run', 'file','seqrun','collection' :param pipeline_name: A pipeline name to change the status of the seed :param dbconfig_file: A file containing the database configuration :param log_slack: A boolean flag for toggling Slack messages, default True :param log_asana: Aboolean flag for toggling Asana message, default True :param slack_config: A file containing Slack tokens, default None :param asana_config: A file containing Asana tokens, default None :param asana_project_id: A numeric Asana project id, default is None :param clean_up: Clean up input file once its processed, default True ''' try: self.igf_id_list = igf_id_list if table_name not in ('project', 'sample', 'experiment', 'run', 'file', 'seqrun', 'collection'): raise ValueError('Table {0} not supported for pipeline seed'.\ format(table_name)) self.table_name = table_name self.pipeline_name = pipeline_name self.clean_up = clean_up dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor = BaseAdaptor(**dbparams) self.log_slack = log_slack self.log_asana = log_asana if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object if log_asana and \ (asana_config is None or \ asana_project_id is None): raise ValueError( 'Missing asana config file or asana project id') elif log_asana and asana_config and asana_project_id: self.igf_asana = IGF_asana( asana_config, asana_project_id) # add asana object except: raise
def __init__(self, projet_info_path, dbconfig, user_account_template, log_slack=True, slack_config=None, check_hpc_user=False, hpc_user=None, hpc_address=None, ldap_server=None, setup_irods=True, notify_user=True, default_user_email='*****@*****.**', project_lookup_column='project_igf_id', user_lookup_column='email_id', data_authority_column='data_authority', sample_lookup_column='sample_igf_id', barcode_check_keyword='barcode_check', metadata_sheet_name='Project metadata', sendmail_exe='/usr/sbin/sendmail'): try: self.projet_info_path = projet_info_path self.user_account_template = user_account_template self.project_lookup_column = project_lookup_column self.user_lookup_column = user_lookup_column self.sample_lookup_column = sample_lookup_column self.data_authority_column = data_authority_column self.log_slack = log_slack dbparams = read_dbconf_json(dbconfig) base = BaseAdaptor(**dbparams) self.session_class = base.get_session_class() self.setup_irods = setup_irods self.notify_user = notify_user self.default_user_email = default_user_email self.barcode_check_keyword = barcode_check_keyword self.check_hpc_user = check_hpc_user self.hpc_user = hpc_user self.hpc_address = hpc_address self.ldap_server = ldap_server self.metadata_sheet_name = metadata_sheet_name self.sendmail_exe = sendmail_exe if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config=slack_config) if check_hpc_user and (hpc_user is None or \ hpc_address is None or \ ldap_server is None): raise ValueError('Hpc user {0} address {1}, and ldap server {2} are required for check_hpc_user'.\ format(hpc_user,hpc_address,ldap_server)) except: raise
def load_new_pipeline_data(data_file, dbconfig): ''' A method for loading new data for pipeline table ''' try: formatted_data = read_json_data(data_file) dbparam = read_dbconf_json(dbconfig) pp = PipelineAdaptor(**dbparam) pp.start_session() pp.store_pipeline_data(data=formatted_data) pp.close_session() except: raise
def load_new_platform_data(data_file, dbconfig): ''' A method for loading new data for platform table ''' try: formatted_data = read_json_data(data_file) dbparam = read_dbconf_json(dbconfig) pl = PlatformAdaptor(**dbparam) pl.start_session() pl.store_platform_data(data=formatted_data) pl.close_session() except: raise
def load_new_seqrun_data(data_file, dbconfig): ''' A method for loading new data for seqrun table ''' try: formatted_data=read_json_data(data_file) dbparam=read_dbconf_json(dbconfig) sr=SeqrunAdaptor(**dbparam) sr.start_session() sr.store_seqrun_and_attribute_data(data=formatted_data) sr.close_session() except: raise
def load_new_flowcell_data(data_file, dbconfig): ''' A method for loading new data to flowcell table ''' try: flowcell_rule_data = read_json_data(data_file) dbparam = read_dbconf_json(dbconfig) pl = PlatformAdaptor(**dbparam) pl.start_session() pl.store_flowcell_barcode_rule(data=flowcell_rule_data) pl.close_session() except: raise
def __init__(self, seqrun_path, seqrun_igf_list, dbconfig_file, clean_up=True, json_collection_type='ILLUMINA_BCL_MD5', log_slack=True, log_asana=True, slack_config=None, asana_project_id=None, asana_config=None, samplesheet_name='SampleSheet.csv'): ''' :param seqrun_path: A directory path for sequencing run home :param seqrun_igf_list: A file path listing sequencing runs to reset :param dbconfig_file: A file containing the database configuration :param clean_up: Clean up input file once its processed, default True :param json_collection_type: A collection type for md5 json file lookup, default ILLUMINA_BCL_MD5 :param log_slack: A boolean flag for toggling Slack messages, default True :param log_asana: Aboolean flag for toggling Asana message, default True :param slack_config: A file containing Slack tokens, default None :param asana_config: A file containing Asana tokens, default None :param asana_project_id: A numeric Asana project id, default is None :param samplesheet_name: Name of the samplesheet file, default SampleSheet.csv ''' try: self.seqrun_path = seqrun_path self.seqrun_igf_list = seqrun_igf_list self.json_collection_type = json_collection_type self.log_slack = log_slack self.log_asana = log_asana self.clean_up = clean_up self.samplesheet_name = samplesheet_name dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor = BaseAdaptor(**dbparams) if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object if log_asana and \ (asana_config is None or \ asana_project_id is None): raise ValueError( 'Missing asana config file or asana project id') elif log_asana and asana_config and asana_project_id: self.igf_asana = IGF_asana( asana_config, asana_project_id) # add asana object except: raise
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam=read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base = BaseAdaptor(**{'session_class':self.session_class}) base.start_session() platform_data=[{ "platform_igf_id" : "M001", "model_name" : "MISEQ" , "vendor_name" : "ILLUMINA" , "software_name" : "RTA", "software_version" : "RTA1.18.54"}] # platform data flowcell_rule_data=[{"platform_igf_id":"M001", "flowcell_type":"MISEQ", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}] # flowcell rule data pl=PlatformAdaptor(**{'session':base.session}) pl.store_platform_data(data=platform_data) # loading platform data pl.store_flowcell_barcode_rule(data=flowcell_rule_data) # loading flowcell rules data project_data=[{'project_igf_id':'ProjectA'}] # project data pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) # load project data sample_data=[{'sample_igf_id':'SampleA', 'project_igf_id':'ProjectA'}] # sample data sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) # store sample data seqrun_data=[{'seqrun_igf_id':'SeqrunA', 'flowcell_id':'000000000-D0YLK', 'platform_igf_id':'M001', 'flowcell':'MISEQ'}] # seqrun data sra=SeqrunAdaptor(**{'session':base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) # load seqrun data experiment_data=[{'experiment_igf_id':'ExperimentA', 'sample_igf_id':'SampleA', 'library_name':'SampleA', 'platform_name':'MISEQ', 'project_igf_id':'ProjectA'}] # experiment data ea=ExperimentAdaptor(**{'session':base.session}) ea.store_project_and_attribute_data(data=experiment_data) # load experiment data base.commit_session() base.close_session()
def __init__(self,dbconfig_file,log_slack=True,slack_config=None): ''' :param dbconfig_file: A database configuration file path :param log_slack: A boolean flag for toggling Slack messages, default True :param slack_config: A file containing Slack tokens, default None ''' try: dbparams = read_dbconf_json(dbconfig_file) self.base_adaptor=BaseAdaptor(**dbparams) self.log_slack=log_slack if log_slack and slack_config is None: raise ValueError('Missing slack config file') elif log_slack and slack_config: self.igf_slack = IGF_slack(slack_config) # add slack object except: raise
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'project_name': 'test_23-8-2017_rna', 'description': 'Its project 2', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] base.start_session() pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sa = SampleAdaptor(**{'session': base.session}) sample_data = [ { 'sample_igf_id': 'IGFS001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS004', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'status': 'FAILED', }, ] sa.store_sample_and_attribute_data(data=sample_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) data = [{ 'project_igf_id': 'IGFP001_test1_24-1-18', }, { 'project_igf_id': 'IGFP002_test1_24-1-18', 'barcode_check': 'ON' }, { 'project_igf_id': 'IGFP003_test1_24-1-18', 'barcode_check': 'OFF' }] self.data = pd.DataFrame(data) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() self.platform_data=\ [{"platform_igf_id" : "M03291" , "model_name" : "MISEQ" , "vendor_name" : "ILLUMINA" , "software_name" : "RTA" , "software_version" : "RTA1.18.54" }, {"platform_igf_id" : "NB501820", "model_name" : "NEXTSEQ", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }, {"platform_igf_id" : "K00345", "model_name" : "HISEQ4000", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }] self.flowcell_rule_data=\ [{"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 SR", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}, {"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 PE", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"NB501820", "flowcell_type":"NEXTSEQ", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"M03291", "flowcell_type":"MISEQ", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}]
def find_new_analysis_seeds(dbconfig_path, pipeline_name, project_name_file, species_name_list, fastq_type, library_source_list): ''' A utils method for finding and seeding new experiments for analysis :param dbconfig_path: A database configuration file :param slack_config: A slack configuration file :param pipeline_name:Pipeline name :param fastq_type: Fastq collection type :param project_name_file: A file containing the list of projects for seeding pipeline :param species_name_list: A list of species to consider for seeding analysis :param library_source_list: A list of library source info to consider for seeding analysis :returns: List of available experiments or None and a list of seeded experiments or None ''' try: available_exps = None seeded_exps = None if not os.path.exists(project_name_file): raise IOError('File {0} not found'.format(project_name_file)) with open(project_name_file, 'r') as fp: project_list = fp.readlines() # read list of projects from file, project_list = [i.strip() for i in project_list] if len(project_list) == 0: project_list = None dbparam = read_dbconf_json(dbconfig_path) pl = PipelineAdaptor(**dbparam) pl.start_session() available_exps,seeded_exps=\ pl.seed_new_experiments(\ pipeline_name=pipeline_name, species_name_list=species_name_list, fastq_type=fastq_type, project_list=project_list, library_source_list=library_source_list ) pl.close_session() return available_exps, seeded_exps except: raise
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'project_name': 'test_23-8-2017_rna', 'description': 'Its project 2', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X' }] user_data = [{ 'name': 'UserA', 'email_id': '*****@*****.**', 'username': '******' }] project_user_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', 'email_id': '*****@*****.**', 'data_authority': True }, { 'project_igf_id': 'IGFP0002_test_22-8-2017_rna', 'email_id': '*****@*****.**' }] base.start_session() ua = UserAdaptor(**{'session': base.session}) ua.store_user_data(data=user_data) pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) pa.assign_user_to_project(data=project_user_data) base.close_session()
def setUp(self): self.dbconfig='data/dbconfig.json' self.platform_json='data/platform_db_data.json' self.seqrun_json='data/seqrun_db_data.json' self.pipeline_json='data/pipeline_data.json' self.flowcell_rules_json='data/flowcell_rules.json' dbparam=read_dbconf_json(self.dbconfig) base=BaseAdaptor(**dbparam) self.engine=base.engine self.dbname=dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base.start_session() # load platform data pl=PlatformAdaptor(**{'session':base.session}) pl.store_platform_data(data=read_json_data(self.platform_json)) pl.store_flowcell_barcode_rule(data=read_json_data(self.flowcell_rules_json)) # load seqrun data sra=SeqrunAdaptor(**{'session':base.session}) sra.store_seqrun_and_attribute_data(data=read_json_data(self.seqrun_json)) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam=read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname=dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class=base.get_session_class() base.start_session() project_data=[{'project_igf_id':'ProjectA'}] pa=ProjectAdaptor(**{'session':base.session}) pa.store_project_and_attribute_data(data=project_data) # load project data sample_data=[{'sample_igf_id':'SampleA', 'project_igf_id':'ProjectA'}] # sample data sa=SampleAdaptor(**{'session':base.session}) sa.store_sample_and_attribute_data(data=sample_data) # store sample data experiment_data=[{'experiment_igf_id':'ExperimentA', 'sample_igf_id':'SampleA', 'library_name':'SampleA', 'platform_name':'MISEQ', 'project_igf_id':'ProjectA'}] # experiment data ea=ExperimentAdaptor(**{'session':base.session}) ea.store_project_and_attribute_data(data=experiment_data) self.temp_dir=get_temp_dir() temp_files=['a.csv','b.csv'] for temp_file in temp_files: with open(os.path.join(self.temp_dir,temp_file),'w') as fp: fp.write('A') collection_data=[{'name':'ExperimentA', 'type':'AnalysisA_html', 'table':'experiment', 'file_path':os.path.join(self.temp_dir,temp_file)} for temp_file in temp_files] ca=CollectionAdaptor(**{'session':base.session}) ca.load_file_and_create_collection(data=collection_data, calculate_file_size_and_md5=False) base.close_session()
def _fetch_project_info_from_db(self): ''' An internal method for fetching data from db :returns: A dataframe containing following columns project_igf_id, sample_igf_id, expected_read, total_read ''' try: check_file_path(self.dbconfig_file) dbconf = read_dbconf_json(self.dbconfig_file) sa = SampleAdaptor(**dbconf) sa.start_session() query = sa.session.\ query(Project.project_igf_id, Sample.sample_igf_id, func.max(Sample_attribute.attribute_value).label(self.expected_read_tag), func.sum(Run_attribute.attribute_value).label(self.total_read_tag) ).\ outerjoin(Sample,Project.project_id==Sample.project_id).\ outerjoin(Sample_attribute, Sample.sample_id==Sample_attribute.sample_id).\ outerjoin(Experiment, Sample.sample_id==Experiment.sample_id).\ outerjoin(Run,Experiment.experiment_id==Run.experiment_id).\ outerjoin(Run_attribute,Run.run_id==Run_attribute.run_id).\ filter((Experiment.platform_name.in_(self.platform_list))|(Experiment.platform_name.is_(None))).\ filter(Sample_attribute.attribute_name==self.expected_read_tag).\ filter((Run_attribute.attribute_name==self.r1_read_tag)|(Run_attribute.attribute_name.is_(None))).\ group_by(Sample.sample_igf_id) records = sa.fetch_records(query=query, output_mode='dataframe') sa.close_session() records[self.total_read_tag] = records[self.total_read_tag].fillna(0).astype(int) return records except: raise
def fetch_input(self): ''' Fetch input method for base runnable :param dbconfig: A database configuration json file :param log_slack: A toggle for writing logs to slack :param log_asana: A toggle for writing logs to asana ''' try: dbconfig = self.param_required('dbconfig') dbparams = read_dbconf_json(dbconfig) base = BaseAdaptor(**dbparams) session_class = base.get_session_class() self.param('igf_session_class', session_class) # set session class for pipeline if self.param('log_slack'): slack_config = self.param_required('slack_config') igf_slack = IGF_slack(slack_config=slack_config) self.param('igf_slack', igf_slack) except: raise
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() self.json_file_path = 'data/reset_samplesheet_md5/seqrun1_file_md5.json' json_data = pd.DataFrame([{ 'file_md5': '1e7531158974b5a5b7cbb7dde09ac779', 'seqrun_file_name': 'SampleSheet.csv' }, { 'file_md5': '2b22f945bc9e7e390af5432425783a03', 'seqrun_file_name': 'RTAConfiguration.xml' }]) with open(self.json_file_path, 'w') as jp: json.dump(json_data.to_dict(orient='record'), jp, indent=4) self.initial_json_md5 = calculate_file_checksum( filepath=self.json_file_path) self.correct_samplesheet_md5 = '259ed03f2e8c45980de121f7c3a70565' self.json_collection_name = 'seqrun1' self.json_collection_type = 'ILLUMINA_BCL_MD5' self.seqrun_path = 'data/reset_samplesheet_md5' self.seqrun_input_list = 'data/reset_samplesheet_md5/seqrun_input_list.txt' ca = CollectionAdaptor(**{'session_class': self.session_class}) ca.start_session() data = pd.DataFrame([{ 'name': self.json_collection_name, 'type': self.json_collection_type, 'table': 'seqrun', 'file_path': self.json_file_path, }]) ca.load_file_and_create_collection(data, autosave=True, hasher='md5') ca.close_session() with open(self.seqrun_input_list, 'w') as fp: fp.write(self.json_collection_name)
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() # load platform data platform_data=\ [{"platform_igf_id" : "M03291" , "model_name" : "MISEQ" , "vendor_name" : "ILLUMINA" , "software_name" : "RTA" , "software_version" : "RTA1.18.54" }, {"platform_igf_id" : "NB501820", "model_name" : "NEXTSEQ", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }, {"platform_igf_id" : "K00345", "model_name" : "HISEQ4000", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }] flowcell_rule_data=\ [{"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 SR", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}, {"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 PE", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"NB501820", "flowcell_type":"NEXTSEQ", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"M03291", "flowcell_type":"MISEQ", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}] pl = PlatformAdaptor(**{'session_class': base.session_class}) pl.start_session() pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) pl.close_session() # load project data project_data = [{'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA'}] pa = ProjectAdaptor(**{'session_class': base.session_class}) pa.start_session() pa.store_project_and_attribute_data(data=project_data) pa.close_session() # load samples sample_data = [ { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109792', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109793', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109794', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109795', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109796', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797_1', 'expected_read': 40000000 }, ] sa = SampleAdaptor(**{'session_class': base.session_class}) sa.start_session() sa.store_sample_and_attribute_data(data=sample_data) sa.close_session() # load seqrun data seqrun_data = [{ 'flowcell_id': 'HV2GJBBXX', 'platform_igf_id': 'K00345', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX' }] sra = SeqrunAdaptor(**{'session_class': base.session_class}) sra.start_session() sra.store_seqrun_and_attribute_data(data=seqrun_data) sra.close_session() # load experiment data experiment_data=\ [{'experiment_igf_id': 'IGF109792_HISEQ4000', 'library_name': 'IGF109792', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109792', }, {'experiment_igf_id': 'IGF109793_HISEQ4000', 'library_name': 'IGF109793', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109793', }, {'experiment_igf_id': 'IGF109794_HISEQ4000', 'library_name': 'IGF109794', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109794', }, {'experiment_igf_id': 'IGF109795_HISEQ4000', 'library_name': 'IGF109795', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109795', }, {'experiment_igf_id': 'IGF109796_HISEQ4000', 'library_name': 'IGF109796', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109796', }, {'experiment_igf_id': 'IGF109797_HISEQ4000', 'library_name': 'IGF109797', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797', }, ] ea = ExperimentAdaptor(**{'session_class': base.session_class}) ea.start_session() ea.store_project_and_attribute_data(data=experiment_data) ea.close_session() # load run data run_data=\ [{'experiment_igf_id': 'IGF109792_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109792_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':288046541 }, {'experiment_igf_id': 'IGF109793_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109793_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':14666330 }, {'experiment_igf_id': 'IGF109794_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109794_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':5009143 }, {'experiment_igf_id': 'IGF109795_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109795_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1391747 }, {'experiment_igf_id': 'IGF109796_HISEQ4000', 'lane_number': '7', 'run_igf_id': ' IGF109796_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1318008 }, {'experiment_igf_id': 'IGF109797_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109797_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1216324 }, ] ra = RunAdaptor(**{'session_class': base.session_class}) ra.start_session() ra.store_run_and_attribute_data(data=run_data) ra.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.drop_all(self.engine) if os.path.exists(self.dbname): os.remove(self.dbname) Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() self.temp_work_dir = get_temp_dir() self.temp_base_dir = get_temp_dir() self.input_list = ['a.cram', 'a.vcf.gz', 'b.tar.gz'] for file_name in self.input_list: file_path = os.path.join(self.temp_work_dir, file_name) with open(file_path, 'w') as fq: fq.write('AAAA') # create input files base = BaseAdaptor(**{'session_class': self.session_class}) base.start_session() platform_data = [{ "platform_igf_id": "M001", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }] # platform data flowcell_rule_data = [{ "platform_igf_id": "M001", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] # flowcell rule data pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) # loading platform data pl.store_flowcell_barcode_rule( data=flowcell_rule_data) # loading flowcell rules data project_data = [{'project_igf_id': 'ProjectA'}] # project data pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data( data=project_data) # load project data sample_data = [{ 'sample_igf_id': 'SampleA', 'project_igf_id': 'ProjectA' }] # sample data sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data( data=sample_data) # store sample data seqrun_data = [{ 'seqrun_igf_id': 'SeqrunA', 'flowcell_id': '000000000-D0YLK', 'platform_igf_id': 'M001', 'flowcell': 'MISEQ' }] # seqrun data sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data( data=seqrun_data) # load seqrun data experiment_data = [{ 'experiment_igf_id': 'ExperimentA', 'sample_igf_id': 'SampleA', 'library_name': 'SampleA', 'platform_name': 'MISEQ', 'project_igf_id': 'ProjectA' }] # experiment data ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data( data=experiment_data) # load experiment data run_data = [{ 'run_igf_id': 'RunA', 'experiment_igf_id': 'ExperimentA', 'seqrun_igf_id': 'SeqrunA', 'lane_number': '1' }] # run data ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) # load run data base.commit_session() base.close_session()
def test_read_dbconf_json(self): with self.assertRaises(ValueError): read_dbconf_json(dbconfig=self.dbconfig)
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() platform_data = [ { "platform_igf_id": "M03291", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, ] flowcell_rule_data = [{ "platform_igf_id": "M03291", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) project_data = [{'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq'}] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [{ 'sample_igf_id': 'IGF103923', 'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq', 'species_name': 'HG38' }] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) seqrun_data = [ { 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'flowcell_id': '000000000-BRN47', 'platform_igf_id': 'M03291', 'flowcell': 'MISEQ' }, ] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) pipeline_data = [ { "pipeline_name": "PrimaryAnalysis", "pipeline_db": "sqlite:////bcl2fastq.db" }, { "pipeline_name": "DemultiplexIlluminaFastq", "pipeline_db": "sqlite:////bcl2fastq.db" }, ] pla = PipelineAdaptor(**{'session': base.session}) pla.store_pipeline_data(data=pipeline_data) file_data = [ { 'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404' }, { 'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1467047580' }, { 'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1467047580' }, ] fa = FileAdaptor(**{'session': base.session}) fa.store_file_and_attribute_data(data=file_data) collection_data = [ { 'name': 'IGF103923_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'table': 'run' }, { 'name': 'IGF103923_MISEQ1_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'table': 'run' }, ] collection_files_data = [ { 'name': 'IGF103923_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz' }, { 'name': 'IGF103923_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz' }, { 'name': 'IGF103923_MISEQ1_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz' }, ] ca = CollectionAdaptor(**{'session': base.session}) ca.store_collection_and_attribute_data(data=collection_data) ca.create_collection_group(data=collection_files_data) experiment_data = [{ 'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq', 'sample_igf_id': 'IGF103923', 'experiment_igf_id': 'IGF103923_MISEQ', 'library_name': 'IGF103923', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ' }, { 'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq', 'sample_igf_id': 'IGF103923', 'experiment_igf_id': 'IGF103923_MISEQ1', 'library_name': 'IGF103923_1', 'library_source': 'GENOMIC_SINGLE_CELL', 'library_strategy': 'WGS', 'experiment_type': 'UNKNOWN', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ' }] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) run_data = [{ 'experiment_igf_id': 'IGF103923_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'IGF103923_MISEQ_000000000-BRN47_1', 'lane_number': '1' }, { 'experiment_igf_id': 'IGF103923_MISEQ1', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'IGF103923_MISEQ1_000000000-BRN47_1', 'lane_number': '1' }] ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) base.start_session() self.session_class = base.get_session_class() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'IGF00001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) experiment_data = [ { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00001', 'experiment_igf_id': 'IGF00001_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00003', 'experiment_igf_id': 'IGF00003_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00002', 'experiment_igf_id': 'IGF00002_HISEQ4000', 'library_name': 'IGF00002' }, ] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'IGF00001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'experiment_type': 'POLYA-RNA' }, { 'sample_igf_id': 'IGF00002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) experiment_data = [ { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00001', 'experiment_igf_id': 'IGF00001_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00003', 'experiment_igf_id': 'IGF00003_HISEQ4000', 'library_name': 'IGF00001' }, { 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'sample_igf_id': 'IGF00002', 'experiment_igf_id': 'IGF00002_HISEQ4000', 'library_name': 'IGF00002' }, ] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) pipeline_data = [{ "pipeline_name": "alignment", "pipeline_db": "sqlite:////data/aln.db", "pipeline_init_conf": { "input_dir": "data/fastq_dir/", "output_dir": "data" }, "pipeline_run_conf": { "output_dir": "data" } }] pl = PipelineAdaptor(**{'session': base.session}) pl.store_pipeline_data(data=pipeline_data) pipeline_seed_data = [ { 'pipeline_name': 'alignment', 'seed_id': '1', 'seed_table': 'experiment' }, ] pl.create_pipeline_seed(data=pipeline_seed_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' self.fastq_dir = 'data/collect_fastq_dir/sc_1_8' self.model_name = 'NEXTSEQ' self.flowcell_id = 'TESTABC' self.seqrun_igf_id = '171003_NB500000_0089_TESTABC' self.file_location = 'HPC_PROJECT' self.samplesheet_file = 'data/collect_fastq_dir/sc_1_8/SampleSheet.csv' self.samplesheet_filename = 'SampleSheet.csv' self.manifest_name = 'file_manifest.csv' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.session_class base.start_session() platform_data = [{ "platform_igf_id": "M00001", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, { "platform_igf_id": "NB500000", "model_name": "NEXTSEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }, { "platform_igf_id": "K00000", "model_name": "HISEQ4000", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }] flowcell_rule_data = [{ "platform_igf_id": "K00000", "flowcell_type": "HiSeq 3000/4000 SR", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }, { "platform_igf_id": "K00000", "flowcell_type": "HiSeq 3000/4000 PE", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "NB500000", "flowcell_type": "NEXTSEQ", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "M00001", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) project_data = [{ 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', 'project_name': 'test_22-8-2017_rna', 'description': 'Its project 1', 'project_deadline': 'Before August 2017', 'comments': 'Some samples are treated with drug X', }] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'IGF00001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, { 'sample_igf_id': 'IGF00002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc', }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) seqrun_data = [{ 'seqrun_igf_id': '171003_NB500000_0089_TESTABC', 'flowcell_id': 'TESTABC', 'platform_igf_id': 'NB500000', 'flowcell': 'NEXTSEQ', }] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) base.close_session()
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() base.start_session() platform_data = [{ "platform_igf_id": "M03291", "model_name": "MISEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA1.18.54" }, { "platform_igf_id": "NB501820", "model_name": "NEXTSEQ", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }, { "platform_igf_id": "K00345", "model_name": "HISEQ4000", "vendor_name": "ILLUMINA", "software_name": "RTA", "software_version": "RTA2" }] flowcell_rule_data = [{ "platform_igf_id": "K00345", "flowcell_type": "HiSeq 3000/4000 SR", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }, { "platform_igf_id": "K00345", "flowcell_type": "HiSeq 3000/4000 PE", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "NB501820", "flowcell_type": "NEXTSEQ", "index_1": "NO_CHANGE", "index_2": "REVCOMP" }, { "platform_igf_id": "M03291", "flowcell_type": "MISEQ", "index_1": "NO_CHANGE", "index_2": "NO_CHANGE" }] pl = PlatformAdaptor(**{'session': base.session}) pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) seqrun_data = [{ 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'flowcell_id': '000000000-BRN47', 'platform_igf_id': 'M03291', 'flowcell': 'MISEQ', }, { 'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47', 'flowcell_id': '000000001-BRN47', 'platform_igf_id': 'NB501820', 'flowcell': 'NEXTSEQ', }] sra = SeqrunAdaptor(**{'session': base.session}) sra.store_seqrun_and_attribute_data(data=seqrun_data) project_data = [{'project_igf_id': 'projectA'}] pa = ProjectAdaptor(**{'session': base.session}) pa.store_project_and_attribute_data(data=project_data) sample_data = [ { 'sample_igf_id': 'sampleA', 'project_igf_id': 'projectA', 'species_name': 'HG38' }, { 'sample_igf_id': 'sampleB', 'project_igf_id': 'projectA', 'species_name': 'UNKNOWN' }, ] sa = SampleAdaptor(**{'session': base.session}) sa.store_sample_and_attribute_data(data=sample_data) experiment_data = [ { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleA', 'experiment_igf_id': 'sampleA_MISEQ', 'library_name': 'sampleA', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', }, { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleA', 'experiment_igf_id': 'sampleA_NEXTSEQ', 'library_name': 'sampleA', 'library_source': 'UNKNOWN', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'NEXTSEQ', }, { 'project_igf_id': 'projectA', 'sample_igf_id': 'sampleB', 'experiment_igf_id': 'sampleB_MISEQ', 'library_name': 'sampleB', 'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL', 'library_strategy': 'RNA-SEQ', 'experiment_type': 'TENX-TRANSCRIPTOME-3P', 'library_layout': 'PAIRED', 'platform_name': 'MISEQ', }, ] ea = ExperimentAdaptor(**{'session': base.session}) ea.store_project_and_attribute_data(data=experiment_data) run_data = [{ 'experiment_igf_id': 'sampleA_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'sampleA_MISEQ_000000000-BRN47_1', 'lane_number': '1' }, { 'experiment_igf_id': 'sampleA_NEXTSEQ', 'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47', 'run_igf_id': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'lane_number': '2' }, { 'experiment_igf_id': 'sampleB_MISEQ', 'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47', 'run_igf_id': 'sampleB_MISEQ_HVWN7BBXX_1', 'lane_number': '1' }] ra = RunAdaptor(**{'session': base.session}) ra.store_run_and_attribute_data(data=run_data) file_data = [ { 'file_path': '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, { 'file_path': '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, { 'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz', 'location': 'HPC_PROJECT', 'md5': 'fd5a95c18ebb7145645e95ce08d729e4', 'size': '1528121404', }, ] fa = FileAdaptor(**{'session': base.session}) fa.store_file_and_attribute_data(data=file_data) collection_data = [{ 'name': 'sampleA_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'table': 'run' }, { 'name': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'type': 'demultiplexed_fastq', 'table': 'run' }, { 'name': 'sampleB_MISEQ_HVWN7BBXX_1', 'type': 'demultiplexed_fastq', 'table': 'run' }] collection_files_data = [{ 'name': 'sampleA_MISEQ_000000000-BRN47_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz' }, { 'name': 'sampleA_NEXTSEQ_000000001-BRN47_2', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz' }, { 'name': 'sampleB_MISEQ_HVWN7BBXX_1', 'type': 'demultiplexed_fastq', 'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz' }] ca = CollectionAdaptor(**{'session': base.session}) ca.store_collection_and_attribute_data(data=collection_data) ca.create_collection_group(data=collection_files_data) base.close_session()