Exemplo n.º 1
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     self.platform_json = 'data/platform_db_data.json'
     self.seqrun_json = 'data/seqrun_db_data.json'
     self.pipeline_json = 'data/pipeline_data.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     # load platform data
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=read_json_data(self.platform_json))
     # load seqrun data
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(
         data=read_json_data(self.seqrun_json))
     # load platform data
     pla = PipelineAdaptor(**{'session': base.session})
     pla.store_pipeline_data(data=read_json_data(self.pipeline_json))
     pipeline_seed_data = [
         {
             'pipeline_name': 'demultiplexing_fastq',
             'seed_id': '1',
             'seed_table': 'seqrun'
         },
     ]
     pla.create_pipeline_seed(data=pipeline_seed_data)
     base.close_session()
Exemplo n.º 2
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     self.base = BaseAdaptor(**dbparam)
     self.engine = self.base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
    def __init__(
        self,
        igf_id_list,
        table_name,
        pipeline_name,
        dbconfig_file,
        log_slack=True,
        log_asana=True,
        slack_config=None,
        asana_project_id=None,
        asana_config=None,
        clean_up=True,
    ):
        '''
    :param igf_id_list: A list of igf ids to uniquely identify the entity
    :param table_name: A database table name to look for the igf id
                       available options are 'project','sample','experiment','run',
                                             'file','seqrun','collection'
    :param pipeline_name: A pipeline name to change the status of the seed
    :param dbconfig_file: A file containing the database configuration
    :param log_slack: A boolean flag for toggling Slack messages, default True
    :param log_asana: Aboolean flag for toggling Asana message, default True
    :param slack_config: A file containing Slack tokens, default None
    :param asana_config: A file containing Asana tokens, default None
    :param asana_project_id: A numeric Asana project id, default is None
    :param clean_up: Clean up input file once its processed, default True
    '''
        try:
            self.igf_id_list = igf_id_list
            if table_name not in ('project', 'sample', 'experiment', 'run',
                                  'file', 'seqrun', 'collection'):
                raise ValueError('Table {0} not supported for pipeline seed'.\
                                 format(table_name))
            self.table_name = table_name
            self.pipeline_name = pipeline_name
            self.clean_up = clean_up
            dbparams = read_dbconf_json(dbconfig_file)
            self.base_adaptor = BaseAdaptor(**dbparams)
            self.log_slack = log_slack
            self.log_asana = log_asana
            if log_slack and slack_config is None:
                raise ValueError('Missing slack config file')
            elif log_slack and slack_config:
                self.igf_slack = IGF_slack(slack_config)  # add slack object

            if log_asana and \
               (asana_config is None or \
                asana_project_id is None):
                raise ValueError(
                    'Missing asana config file or asana project id')
            elif log_asana and asana_config and asana_project_id:
                self.igf_asana = IGF_asana(
                    asana_config, asana_project_id)  # add asana object
        except:
            raise
Exemplo n.º 5
0
    def __init__(self,
                 projet_info_path,
                 dbconfig,
                 user_account_template,
                 log_slack=True,
                 slack_config=None,
                 check_hpc_user=False,
                 hpc_user=None,
                 hpc_address=None,
                 ldap_server=None,
                 setup_irods=True,
                 notify_user=True,
                 default_user_email='*****@*****.**',
                 project_lookup_column='project_igf_id',
                 user_lookup_column='email_id',
                 data_authority_column='data_authority',
                 sample_lookup_column='sample_igf_id',
                 barcode_check_keyword='barcode_check',
                 metadata_sheet_name='Project metadata',
                 sendmail_exe='/usr/sbin/sendmail'):
        try:
            self.projet_info_path = projet_info_path
            self.user_account_template = user_account_template
            self.project_lookup_column = project_lookup_column
            self.user_lookup_column = user_lookup_column
            self.sample_lookup_column = sample_lookup_column
            self.data_authority_column = data_authority_column
            self.log_slack = log_slack
            dbparams = read_dbconf_json(dbconfig)
            base = BaseAdaptor(**dbparams)
            self.session_class = base.get_session_class()
            self.setup_irods = setup_irods
            self.notify_user = notify_user
            self.default_user_email = default_user_email
            self.barcode_check_keyword = barcode_check_keyword
            self.check_hpc_user = check_hpc_user
            self.hpc_user = hpc_user
            self.hpc_address = hpc_address
            self.ldap_server = ldap_server
            self.metadata_sheet_name = metadata_sheet_name
            self.sendmail_exe = sendmail_exe
            if log_slack and slack_config is None:
                raise ValueError('Missing slack config file')
            elif log_slack and slack_config:
                self.igf_slack = IGF_slack(slack_config=slack_config)

            if check_hpc_user and (hpc_user is None or \
                                   hpc_address is None or \
                                   ldap_server is None):
                raise ValueError('Hpc user {0} address {1}, and ldap server {2} are required for check_hpc_user'.\
                                 format(hpc_user,hpc_address,ldap_server))
        except:
            raise
def load_new_pipeline_data(data_file, dbconfig):
    '''
  A method for loading new data for pipeline table
  '''
    try:
        formatted_data = read_json_data(data_file)
        dbparam = read_dbconf_json(dbconfig)
        pp = PipelineAdaptor(**dbparam)
        pp.start_session()
        pp.store_pipeline_data(data=formatted_data)
        pp.close_session()
    except:
        raise
Exemplo n.º 7
0
def load_new_platform_data(data_file, dbconfig):
    '''
  A method for loading new data for platform table
  '''
    try:
        formatted_data = read_json_data(data_file)
        dbparam = read_dbconf_json(dbconfig)
        pl = PlatformAdaptor(**dbparam)
        pl.start_session()
        pl.store_platform_data(data=formatted_data)
        pl.close_session()
    except:
        raise
Exemplo n.º 8
0
def load_new_seqrun_data(data_file, dbconfig):
  '''
  A method for loading new data for seqrun table
  '''
  try:
    formatted_data=read_json_data(data_file)
    dbparam=read_dbconf_json(dbconfig)
    sr=SeqrunAdaptor(**dbparam)
    sr.start_session()
    sr.store_seqrun_and_attribute_data(data=formatted_data)
    sr.close_session()
  except:
    raise
Exemplo n.º 9
0
def load_new_flowcell_data(data_file, dbconfig):
    '''
  A method for loading new data to flowcell table
  '''
    try:
        flowcell_rule_data = read_json_data(data_file)
        dbparam = read_dbconf_json(dbconfig)
        pl = PlatformAdaptor(**dbparam)
        pl.start_session()
        pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
        pl.close_session()
    except:
        raise
Exemplo n.º 10
0
    def __init__(self,
                 seqrun_path,
                 seqrun_igf_list,
                 dbconfig_file,
                 clean_up=True,
                 json_collection_type='ILLUMINA_BCL_MD5',
                 log_slack=True,
                 log_asana=True,
                 slack_config=None,
                 asana_project_id=None,
                 asana_config=None,
                 samplesheet_name='SampleSheet.csv'):
        '''
    :param seqrun_path: A directory path for sequencing run home
    :param seqrun_igf_list: A file path listing sequencing runs to reset
    :param dbconfig_file: A file containing the database configuration
    :param clean_up: Clean up input file once its processed, default True
    :param json_collection_type: A collection type for md5 json file lookup, default ILLUMINA_BCL_MD5
    :param log_slack: A boolean flag for toggling Slack messages, default True
    :param log_asana: Aboolean flag for toggling Asana message, default True
    :param slack_config: A file containing Slack tokens, default None
    :param asana_config: A file containing Asana tokens, default None
    :param asana_project_id: A numeric Asana project id, default is None
    :param samplesheet_name: Name of the samplesheet file, default SampleSheet.csv
    '''
        try:
            self.seqrun_path = seqrun_path
            self.seqrun_igf_list = seqrun_igf_list
            self.json_collection_type = json_collection_type
            self.log_slack = log_slack
            self.log_asana = log_asana
            self.clean_up = clean_up
            self.samplesheet_name = samplesheet_name
            dbparams = read_dbconf_json(dbconfig_file)
            self.base_adaptor = BaseAdaptor(**dbparams)
            if log_slack and slack_config is None:
                raise ValueError('Missing slack config file')
            elif log_slack and slack_config:
                self.igf_slack = IGF_slack(slack_config)  # add slack object

            if log_asana and \
               (asana_config is None or \
                asana_project_id is None):
                raise ValueError(
                    'Missing asana config file or asana project id')
            elif log_asana and asana_config and asana_project_id:
                self.igf_asana = IGF_asana(
                    asana_config, asana_project_id)  # add asana object
        except:
            raise
  def setUp(self):
    self.dbconfig = 'data/dbconfig.json'
    dbparam=read_dbconf_json(self.dbconfig)
    base = BaseAdaptor(**dbparam)
    self.engine = base.engine
    self.dbname=dbparam['dbname']
    Base.metadata.drop_all(self.engine)
    if os.path.exists(self.dbname):
      os.remove(self.dbname)
    Base.metadata.create_all(self.engine)
    self.session_class=base.get_session_class()

    base = BaseAdaptor(**{'session_class':self.session_class})
    base.start_session()
    platform_data=[{ "platform_igf_id" : "M001",
                     "model_name" : "MISEQ" ,
                     "vendor_name" : "ILLUMINA" ,
                     "software_name" : "RTA",
                     "software_version" : "RTA1.18.54"}]                        # platform data
    flowcell_rule_data=[{"platform_igf_id":"M001",
                         "flowcell_type":"MISEQ",
                         "index_1":"NO_CHANGE",
                         "index_2":"NO_CHANGE"}]                                # flowcell rule data
    pl=PlatformAdaptor(**{'session':base.session})
    pl.store_platform_data(data=platform_data)                                  # loading platform data
    pl.store_flowcell_barcode_rule(data=flowcell_rule_data)                     # loading flowcell rules data
    project_data=[{'project_igf_id':'ProjectA'}]                                # project data
    pa=ProjectAdaptor(**{'session':base.session})
    pa.store_project_and_attribute_data(data=project_data)                      # load project data
    sample_data=[{'sample_igf_id':'SampleA',
                  'project_igf_id':'ProjectA'}]                                 # sample data
    sa=SampleAdaptor(**{'session':base.session})
    sa.store_sample_and_attribute_data(data=sample_data)                        # store sample data
    seqrun_data=[{'seqrun_igf_id':'SeqrunA', 
                  'flowcell_id':'000000000-D0YLK', 
                  'platform_igf_id':'M001',
                  'flowcell':'MISEQ'}]                                          # seqrun data
    sra=SeqrunAdaptor(**{'session':base.session})
    sra.store_seqrun_and_attribute_data(data=seqrun_data)                       # load seqrun data
    experiment_data=[{'experiment_igf_id':'ExperimentA',
                      'sample_igf_id':'SampleA',
                      'library_name':'SampleA',
                      'platform_name':'MISEQ',
                      'project_igf_id':'ProjectA'}]                             # experiment data
    ea=ExperimentAdaptor(**{'session':base.session})
    ea.store_project_and_attribute_data(data=experiment_data)                   # load experiment data
    base.commit_session()
    base.close_session()
 def __init__(self,dbconfig_file,log_slack=True,slack_config=None):
   '''
   :param dbconfig_file: A database configuration file path
   :param log_slack: A boolean flag for toggling Slack messages, default True
   :param slack_config: A file containing Slack tokens, default None
   '''
   try:
     dbparams = read_dbconf_json(dbconfig_file)
     self.base_adaptor=BaseAdaptor(**dbparams)
     self.log_slack=log_slack
     if log_slack and slack_config is None:
       raise ValueError('Missing slack config file')
     elif log_slack and slack_config:
       self.igf_slack = IGF_slack(slack_config)                                # add slack object
   except:
     raise
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     project_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         'project_name': 'test_22-8-2017_rna',
         'description': 'Its project 1',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }, {
         'project_igf_id': 'IGFP0002_test_22-8-2017_rna',
         'project_name': 'test_23-8-2017_rna',
         'description': 'Its project 2',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }]
     base.start_session()
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sa = SampleAdaptor(**{'session': base.session})
     sample_data = [
         {
             'sample_igf_id': 'IGFS001',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         },
         {
             'sample_igf_id': 'IGFS002',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         },
         {
             'sample_igf_id': 'IGFS003',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         },
         {
             'sample_igf_id': 'IGFS004',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
             'status': 'FAILED',
         },
     ]
     sa.store_sample_and_attribute_data(data=sample_data)
     base.close_session()
Exemplo n.º 14
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     data = [{
         'project_igf_id': 'IGFP001_test1_24-1-18',
     }, {
         'project_igf_id': 'IGFP002_test1_24-1-18',
         'barcode_check': 'ON'
     }, {
         'project_igf_id': 'IGFP003_test1_24-1-18',
         'barcode_check': 'OFF'
     }]
     self.data = pd.DataFrame(data)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
Exemplo n.º 15
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     self.platform_data=\
       [{"platform_igf_id" : "M03291" ,
         "model_name" : "MISEQ" ,
         "vendor_name" : "ILLUMINA" ,
         "software_name" : "RTA" ,
         "software_version" : "RTA1.18.54"
        },
        {"platform_igf_id" : "NB501820",
         "model_name" : "NEXTSEQ",
         "vendor_name" : "ILLUMINA",
         "software_name" : "RTA",
         "software_version" : "RTA2"
        },
        {"platform_igf_id" : "K00345",
         "model_name" : "HISEQ4000",
         "vendor_name" : "ILLUMINA",
         "software_name" : "RTA",
         "software_version" : "RTA2"
        }]
     self.flowcell_rule_data=\
       [{"platform_igf_id":"K00345",
         "flowcell_type":"HiSeq 3000/4000 SR",
         "index_1":"NO_CHANGE",
         "index_2":"NO_CHANGE"},
        {"platform_igf_id":"K00345",
         "flowcell_type":"HiSeq 3000/4000 PE",
         "index_1":"NO_CHANGE",
         "index_2":"REVCOMP"},
        {"platform_igf_id":"NB501820",
         "flowcell_type":"NEXTSEQ",
         "index_1":"NO_CHANGE",
         "index_2":"REVCOMP"},
        {"platform_igf_id":"M03291",
         "flowcell_type":"MISEQ",
         "index_1":"NO_CHANGE",
         "index_2":"NO_CHANGE"}]
def find_new_analysis_seeds(dbconfig_path, pipeline_name, project_name_file,
                            species_name_list, fastq_type,
                            library_source_list):
    '''
  A utils method for finding and seeding new experiments for analysis
  
  :param dbconfig_path: A database configuration file
  :param slack_config: A slack configuration file
  :param pipeline_name:Pipeline name
  :param fastq_type: Fastq collection type
  :param project_name_file: A file containing the list of projects for seeding pipeline
  :param species_name_list: A list of species to consider for seeding analysis
  :param library_source_list: A list of library source info to consider for seeding analysis
  :returns: List of available experiments or None and a list of seeded experiments or None
  '''
    try:
        available_exps = None
        seeded_exps = None
        if not os.path.exists(project_name_file):
            raise IOError('File {0} not found'.format(project_name_file))

        with open(project_name_file, 'r') as fp:
            project_list = fp.readlines()  # read list of projects from file,
            project_list = [i.strip() for i in project_list]
            if len(project_list) == 0:
                project_list = None

        dbparam = read_dbconf_json(dbconfig_path)
        pl = PipelineAdaptor(**dbparam)
        pl.start_session()
        available_exps,seeded_exps=\
          pl.seed_new_experiments(\
            pipeline_name=pipeline_name,
            species_name_list=species_name_list,
            fastq_type=fastq_type,
            project_list=project_list,
            library_source_list=library_source_list
          )
        pl.close_session()
        return available_exps, seeded_exps
    except:
        raise
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     project_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         'project_name': 'test_22-8-2017_rna',
         'description': 'Its project 1',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }, {
         'project_igf_id': 'IGFP0002_test_22-8-2017_rna',
         'project_name': 'test_23-8-2017_rna',
         'description': 'Its project 2',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X'
     }]
     user_data = [{
         'name': 'UserA',
         'email_id': '*****@*****.**',
         'username': '******'
     }]
     project_user_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         'email_id': '*****@*****.**',
         'data_authority': True
     }, {
         'project_igf_id': 'IGFP0002_test_22-8-2017_rna',
         'email_id': '*****@*****.**'
     }]
     base.start_session()
     ua = UserAdaptor(**{'session': base.session})
     ua.store_user_data(data=user_data)
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     pa.assign_user_to_project(data=project_user_data)
     base.close_session()
 def setUp(self):
     self.dbconfig='data/dbconfig.json'
     self.platform_json='data/platform_db_data.json'
     self.seqrun_json='data/seqrun_db_data.json'
     self.pipeline_json='data/pipeline_data.json'
     self.flowcell_rules_json='data/flowcell_rules.json'
     dbparam=read_dbconf_json(self.dbconfig)
     base=BaseAdaptor(**dbparam)
     self.engine=base.engine
     self.dbname=dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class=base.get_session_class()
     base.start_session()
     # load platform data
     pl=PlatformAdaptor(**{'session':base.session})
     pl.store_platform_data(data=read_json_data(self.platform_json))
     pl.store_flowcell_barcode_rule(data=read_json_data(self.flowcell_rules_json))
     # load seqrun data
     sra=SeqrunAdaptor(**{'session':base.session})
     sra.store_seqrun_and_attribute_data(data=read_json_data(self.seqrun_json))
     base.close_session()
Exemplo n.º 19
0
 def setUp(self):
   self.dbconfig = 'data/dbconfig.json'
   dbparam=read_dbconf_json(self.dbconfig)
   base = BaseAdaptor(**dbparam)
   self.engine = base.engine
   self.dbname=dbparam['dbname']
   Base.metadata.drop_all(self.engine)
   if os.path.exists(self.dbname):
     os.remove(self.dbname)
   Base.metadata.create_all(self.engine)
   self.session_class=base.get_session_class()
   base.start_session()
   project_data=[{'project_igf_id':'ProjectA'}]
   pa=ProjectAdaptor(**{'session':base.session})
   pa.store_project_and_attribute_data(data=project_data)                      # load project data
   sample_data=[{'sample_igf_id':'SampleA',
                 'project_igf_id':'ProjectA'}]                                 # sample data
   sa=SampleAdaptor(**{'session':base.session})
   sa.store_sample_and_attribute_data(data=sample_data)                        # store sample data
   experiment_data=[{'experiment_igf_id':'ExperimentA',
                     'sample_igf_id':'SampleA',
                     'library_name':'SampleA',
                     'platform_name':'MISEQ',
                     'project_igf_id':'ProjectA'}]                             # experiment data
   ea=ExperimentAdaptor(**{'session':base.session})
   ea.store_project_and_attribute_data(data=experiment_data)
   self.temp_dir=get_temp_dir()
   temp_files=['a.csv','b.csv']
   for temp_file in temp_files:
     with open(os.path.join(self.temp_dir,temp_file),'w') as fp:
       fp.write('A')
   collection_data=[{'name':'ExperimentA',
                     'type':'AnalysisA_html',
                     'table':'experiment',
                     'file_path':os.path.join(self.temp_dir,temp_file)}
                     for temp_file in temp_files]
   ca=CollectionAdaptor(**{'session':base.session})
   ca.load_file_and_create_collection(data=collection_data,
                                      calculate_file_size_and_md5=False)
   base.close_session()
  def _fetch_project_info_from_db(self):
    '''
    An internal method for fetching data from db

    :returns: A dataframe containing following columns
    
              project_igf_id,
              sample_igf_id,
              expected_read,
              total_read
    '''
    try:
      check_file_path(self.dbconfig_file)
      dbconf = read_dbconf_json(self.dbconfig_file)
      sa = SampleAdaptor(**dbconf)
      sa.start_session()
      query = sa.session.\
              query(Project.project_igf_id,
                    Sample.sample_igf_id,
                    func.max(Sample_attribute.attribute_value).label(self.expected_read_tag),
                    func.sum(Run_attribute.attribute_value).label(self.total_read_tag)
                   ).\
              outerjoin(Sample,Project.project_id==Sample.project_id).\
              outerjoin(Sample_attribute, Sample.sample_id==Sample_attribute.sample_id).\
              outerjoin(Experiment, Sample.sample_id==Experiment.sample_id).\
              outerjoin(Run,Experiment.experiment_id==Run.experiment_id).\
              outerjoin(Run_attribute,Run.run_id==Run_attribute.run_id).\
              filter((Experiment.platform_name.in_(self.platform_list))|(Experiment.platform_name.is_(None))).\
              filter(Sample_attribute.attribute_name==self.expected_read_tag).\
              filter((Run_attribute.attribute_name==self.r1_read_tag)|(Run_attribute.attribute_name.is_(None))).\
              group_by(Sample.sample_igf_id)
      records = sa.fetch_records(query=query,
                                 output_mode='dataframe')
      sa.close_session()
      records[self.total_read_tag] = records[self.total_read_tag].fillna(0).astype(int)
      return records
    except:
      raise
Exemplo n.º 21
0
    def fetch_input(self):
        '''
    Fetch input method for base runnable
    
    :param dbconfig: A database configuration json file
    :param log_slack: A toggle for writing logs to slack
    :param log_asana: A toggle for writing logs to asana 
    '''
        try:
            dbconfig = self.param_required('dbconfig')
            dbparams = read_dbconf_json(dbconfig)
            base = BaseAdaptor(**dbparams)
            session_class = base.get_session_class()
            self.param('igf_session_class',
                       session_class)  # set session class for pipeline

            if self.param('log_slack'):
                slack_config = self.param_required('slack_config')
                igf_slack = IGF_slack(slack_config=slack_config)
                self.param('igf_slack', igf_slack)

        except:
            raise
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     self.json_file_path = 'data/reset_samplesheet_md5/seqrun1_file_md5.json'
     json_data = pd.DataFrame([{
         'file_md5': '1e7531158974b5a5b7cbb7dde09ac779',
         'seqrun_file_name': 'SampleSheet.csv'
     }, {
         'file_md5': '2b22f945bc9e7e390af5432425783a03',
         'seqrun_file_name': 'RTAConfiguration.xml'
     }])
     with open(self.json_file_path, 'w') as jp:
         json.dump(json_data.to_dict(orient='record'), jp, indent=4)
     self.initial_json_md5 = calculate_file_checksum(
         filepath=self.json_file_path)
     self.correct_samplesheet_md5 = '259ed03f2e8c45980de121f7c3a70565'
     self.json_collection_name = 'seqrun1'
     self.json_collection_type = 'ILLUMINA_BCL_MD5'
     self.seqrun_path = 'data/reset_samplesheet_md5'
     self.seqrun_input_list = 'data/reset_samplesheet_md5/seqrun_input_list.txt'
     ca = CollectionAdaptor(**{'session_class': self.session_class})
     ca.start_session()
     data = pd.DataFrame([{
         'name': self.json_collection_name,
         'type': self.json_collection_type,
         'table': 'seqrun',
         'file_path': self.json_file_path,
     }])
     ca.load_file_and_create_collection(data, autosave=True, hasher='md5')
     ca.close_session()
     with open(self.seqrun_input_list, 'w') as fp:
         fp.write(self.json_collection_name)
Exemplo n.º 23
0
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.create_all(self.engine)
        self.session_class = base.get_session_class()
        # load platform data
        platform_data=\
          [{"platform_igf_id" : "M03291" ,
            "model_name" : "MISEQ" ,
            "vendor_name" : "ILLUMINA" ,
            "software_name" : "RTA" ,
            "software_version" : "RTA1.18.54"
           },
           {"platform_igf_id" : "NB501820",
            "model_name" : "NEXTSEQ",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           },
           {"platform_igf_id" : "K00345",
            "model_name" : "HISEQ4000",
            "vendor_name" : "ILLUMINA",
            "software_name" : "RTA",
            "software_version" : "RTA2"
           }]

        flowcell_rule_data=\
          [{"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 SR",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"},
           {"platform_igf_id":"K00345",
            "flowcell_type":"HiSeq 3000/4000 PE",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"NB501820",
            "flowcell_type":"NEXTSEQ",
            "index_1":"NO_CHANGE",
            "index_2":"REVCOMP"},
           {"platform_igf_id":"M03291",
            "flowcell_type":"MISEQ",
            "index_1":"NO_CHANGE",
            "index_2":"NO_CHANGE"}]

        pl = PlatformAdaptor(**{'session_class': base.session_class})
        pl.start_session()
        pl.store_platform_data(data=platform_data)
        pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
        pl.close_session()

        # load project data

        project_data = [{'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA'}]
        pa = ProjectAdaptor(**{'session_class': base.session_class})
        pa.start_session()
        pa.store_project_and_attribute_data(data=project_data)
        pa.close_session()

        # load samples

        sample_data = [
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109792',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109793',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109794',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109795',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109796',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797',
                'expected_read': 40000000
            },
            {
                'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
                'sample_igf_id': 'IGF109797_1',
                'expected_read': 40000000
            },
        ]

        sa = SampleAdaptor(**{'session_class': base.session_class})
        sa.start_session()
        sa.store_sample_and_attribute_data(data=sample_data)
        sa.close_session()

        # load seqrun data

        seqrun_data = [{
            'flowcell_id': 'HV2GJBBXX',
            'platform_igf_id': 'K00345',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX'
        }]

        sra = SeqrunAdaptor(**{'session_class': base.session_class})
        sra.start_session()
        sra.store_seqrun_and_attribute_data(data=seqrun_data)
        sra.close_session()

        # load experiment data

        experiment_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'library_name': 'IGF109792',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109792',
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'library_name': 'IGF109793',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109793',
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'library_name': 'IGF109794',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109794',
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'library_name': 'IGF109795',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109795',
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'library_name': 'IGF109796',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109796',
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'library_name': 'IGF109797',
            'platform_name': 'HISEQ4000',
            'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA',
            'sample_igf_id': 'IGF109797',
           },
          ]

        ea = ExperimentAdaptor(**{'session_class': base.session_class})
        ea.start_session()
        ea.store_project_and_attribute_data(data=experiment_data)
        ea.close_session()

        # load run data

        run_data=\
          [{'experiment_igf_id': 'IGF109792_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109792_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':288046541
           },
           {'experiment_igf_id': 'IGF109793_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109793_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':14666330
           },
           {'experiment_igf_id': 'IGF109794_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109794_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':5009143
           },
           {'experiment_igf_id': 'IGF109795_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109795_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1391747
           },
           {'experiment_igf_id': 'IGF109796_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': '	IGF109796_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1318008
           },
           {'experiment_igf_id': 'IGF109797_HISEQ4000',
            'lane_number': '7',
            'run_igf_id': 'IGF109797_HISEQ4000_H2N3MBBXY_7',
            'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX',
            'R1_READ_COUNT':1216324
           },
          ]

        ra = RunAdaptor(**{'session_class': base.session_class})
        ra.start_session()
        ra.store_run_and_attribute_data(data=run_data)
        ra.close_session()
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.drop_all(self.engine)
        if os.path.exists(self.dbname):
            os.remove(self.dbname)
        Base.metadata.create_all(self.engine)
        self.session_class = base.get_session_class()
        self.temp_work_dir = get_temp_dir()
        self.temp_base_dir = get_temp_dir()
        self.input_list = ['a.cram', 'a.vcf.gz', 'b.tar.gz']
        for file_name in self.input_list:
            file_path = os.path.join(self.temp_work_dir, file_name)
            with open(file_path, 'w') as fq:
                fq.write('AAAA')  # create input files

        base = BaseAdaptor(**{'session_class': self.session_class})
        base.start_session()
        platform_data = [{
            "platform_igf_id": "M001",
            "model_name": "MISEQ",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA1.18.54"
        }]  # platform data
        flowcell_rule_data = [{
            "platform_igf_id": "M001",
            "flowcell_type": "MISEQ",
            "index_1": "NO_CHANGE",
            "index_2": "NO_CHANGE"
        }]  # flowcell rule data
        pl = PlatformAdaptor(**{'session': base.session})
        pl.store_platform_data(data=platform_data)  # loading platform data
        pl.store_flowcell_barcode_rule(
            data=flowcell_rule_data)  # loading flowcell rules data
        project_data = [{'project_igf_id': 'ProjectA'}]  # project data
        pa = ProjectAdaptor(**{'session': base.session})
        pa.store_project_and_attribute_data(
            data=project_data)  # load project data
        sample_data = [{
            'sample_igf_id': 'SampleA',
            'project_igf_id': 'ProjectA'
        }]  # sample data
        sa = SampleAdaptor(**{'session': base.session})
        sa.store_sample_and_attribute_data(
            data=sample_data)  # store sample data
        seqrun_data = [{
            'seqrun_igf_id': 'SeqrunA',
            'flowcell_id': '000000000-D0YLK',
            'platform_igf_id': 'M001',
            'flowcell': 'MISEQ'
        }]  # seqrun data
        sra = SeqrunAdaptor(**{'session': base.session})
        sra.store_seqrun_and_attribute_data(
            data=seqrun_data)  # load seqrun data
        experiment_data = [{
            'experiment_igf_id': 'ExperimentA',
            'sample_igf_id': 'SampleA',
            'library_name': 'SampleA',
            'platform_name': 'MISEQ',
            'project_igf_id': 'ProjectA'
        }]  # experiment data
        ea = ExperimentAdaptor(**{'session': base.session})
        ea.store_project_and_attribute_data(
            data=experiment_data)  # load experiment data
        run_data = [{
            'run_igf_id': 'RunA',
            'experiment_igf_id': 'ExperimentA',
            'seqrun_igf_id': 'SeqrunA',
            'lane_number': '1'
        }]  # run data
        ra = RunAdaptor(**{'session': base.session})
        ra.store_run_and_attribute_data(data=run_data)  # load run data
        base.commit_session()
        base.close_session()
Exemplo n.º 25
0
 def test_read_dbconf_json(self):
     with self.assertRaises(ValueError):
         read_dbconf_json(dbconfig=self.dbconfig)
Exemplo n.º 26
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     platform_data = [
         {
             "platform_igf_id": "M03291",
             "model_name": "MISEQ",
             "vendor_name": "ILLUMINA",
             "software_name": "RTA",
             "software_version": "RTA1.18.54"
         },
     ]
     flowcell_rule_data = [{
         "platform_igf_id": "M03291",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     project_data = [{'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq'}]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [{
         'sample_igf_id': 'IGF103923',
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'species_name': 'HG38'
     }]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     seqrun_data = [
         {
             'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
             'flowcell_id': '000000000-BRN47',
             'platform_igf_id': 'M03291',
             'flowcell': 'MISEQ'
         },
     ]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     pipeline_data = [
         {
             "pipeline_name": "PrimaryAnalysis",
             "pipeline_db": "sqlite:////bcl2fastq.db"
         },
         {
             "pipeline_name": "DemultiplexIlluminaFastq",
             "pipeline_db": "sqlite:////bcl2fastq.db"
         },
     ]
     pla = PipelineAdaptor(**{'session': base.session})
     pla.store_pipeline_data(data=pipeline_data)
     file_data = [
         {
             'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404'
         },
         {
             'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1467047580'
         },
         {
             'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1467047580'
         },
     ]
     fa = FileAdaptor(**{'session': base.session})
     fa.store_file_and_attribute_data(data=file_data)
     collection_data = [
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'table': 'run'
         },
         {
             'name': 'IGF103923_MISEQ1_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'table': 'run'
         },
     ]
     collection_files_data = [
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S1_L001_R1_001.fastq.gz'
         },
         {
             'name': 'IGF103923_MISEQ_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S1_L001_R2_001.fastq.gz'
         },
         {
             'name': 'IGF103923_MISEQ1_000000000-BRN47_1',
             'type': 'demultiplexed_fastq',
             'file_path': '/path/S20180405S_S3_L001_R2_001.fastq.gz'
         },
     ]
     ca = CollectionAdaptor(**{'session': base.session})
     ca.store_collection_and_attribute_data(data=collection_data)
     ca.create_collection_group(data=collection_files_data)
     experiment_data = [{
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'sample_igf_id': 'IGF103923',
         'experiment_igf_id': 'IGF103923_MISEQ',
         'library_name': 'IGF103923',
         'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
         'library_strategy': 'RNA-SEQ',
         'experiment_type': 'TENX-TRANSCRIPTOME-3P',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ'
     }, {
         'project_igf_id': 'IGFQ000123_avik_10-4-2018_Miseq',
         'sample_igf_id': 'IGF103923',
         'experiment_igf_id': 'IGF103923_MISEQ1',
         'library_name': 'IGF103923_1',
         'library_source': 'GENOMIC_SINGLE_CELL',
         'library_strategy': 'WGS',
         'experiment_type': 'UNKNOWN',
         'library_layout': 'PAIRED',
         'platform_name': 'MISEQ'
     }]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     run_data = [{
         'experiment_igf_id': 'IGF103923_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'IGF103923_MISEQ_000000000-BRN47_1',
         'lane_number': '1'
     }, {
         'experiment_igf_id': 'IGF103923_MISEQ1',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'IGF103923_MISEQ1_000000000-BRN47_1',
         'lane_number': '1'
     }]
     ra = RunAdaptor(**{'session': base.session})
     ra.store_run_and_attribute_data(data=run_data)
     base.close_session()
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     base.start_session()
     self.session_class = base.get_session_class()
     project_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         'project_name': 'test_22-8-2017_rna',
         'description': 'Its project 1',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [
         {
             'sample_igf_id': 'IGF00001',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00003',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00002',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         },
     ]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     experiment_data = [
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00001',
             'experiment_igf_id': 'IGF00001_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00003',
             'experiment_igf_id': 'IGF00003_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00002',
             'experiment_igf_id': 'IGF00002_HISEQ4000',
             'library_name': 'IGF00002'
         },
     ]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     base.close_session()
Exemplo n.º 28
0
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     project_data = [{
         'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         'project_name': 'test_22-8-2017_rna',
         'description': 'Its project 1',
         'project_deadline': 'Before August 2017',
         'comments': 'Some samples are treated with drug X',
     }]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [
         {
             'sample_igf_id': 'IGF00001',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00003',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'experiment_type': 'POLYA-RNA'
         },
         {
             'sample_igf_id': 'IGF00002',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
         },
     ]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     experiment_data = [
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00001',
             'experiment_igf_id': 'IGF00001_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00003',
             'experiment_igf_id': 'IGF00003_HISEQ4000',
             'library_name': 'IGF00001'
         },
         {
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
             'sample_igf_id': 'IGF00002',
             'experiment_igf_id': 'IGF00002_HISEQ4000',
             'library_name': 'IGF00002'
         },
     ]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     pipeline_data = [{
         "pipeline_name": "alignment",
         "pipeline_db": "sqlite:////data/aln.db",
         "pipeline_init_conf": {
             "input_dir": "data/fastq_dir/",
             "output_dir": "data"
         },
         "pipeline_run_conf": {
             "output_dir": "data"
         }
     }]
     pl = PipelineAdaptor(**{'session': base.session})
     pl.store_pipeline_data(data=pipeline_data)
     pipeline_seed_data = [
         {
             'pipeline_name': 'alignment',
             'seed_id': '1',
             'seed_table': 'experiment'
         },
     ]
     pl.create_pipeline_seed(data=pipeline_seed_data)
     base.close_session()
Exemplo n.º 29
0
    def setUp(self):
        self.dbconfig = 'data/dbconfig.json'
        self.fastq_dir = 'data/collect_fastq_dir/sc_1_8'
        self.model_name = 'NEXTSEQ'
        self.flowcell_id = 'TESTABC'
        self.seqrun_igf_id = '171003_NB500000_0089_TESTABC'
        self.file_location = 'HPC_PROJECT'
        self.samplesheet_file = 'data/collect_fastq_dir/sc_1_8/SampleSheet.csv'
        self.samplesheet_filename = 'SampleSheet.csv'
        self.manifest_name = 'file_manifest.csv'
        dbparam = read_dbconf_json(self.dbconfig)
        base = BaseAdaptor(**dbparam)
        self.engine = base.engine
        self.dbname = dbparam['dbname']
        Base.metadata.create_all(self.engine)
        self.session_class = base.session_class
        base.start_session()
        platform_data = [{
            "platform_igf_id": "M00001",
            "model_name": "MISEQ",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA1.18.54"
        }, {
            "platform_igf_id": "NB500000",
            "model_name": "NEXTSEQ",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA2"
        }, {
            "platform_igf_id": "K00000",
            "model_name": "HISEQ4000",
            "vendor_name": "ILLUMINA",
            "software_name": "RTA",
            "software_version": "RTA2"
        }]
        flowcell_rule_data = [{
            "platform_igf_id": "K00000",
            "flowcell_type": "HiSeq 3000/4000 SR",
            "index_1": "NO_CHANGE",
            "index_2": "NO_CHANGE"
        }, {
            "platform_igf_id": "K00000",
            "flowcell_type": "HiSeq 3000/4000 PE",
            "index_1": "NO_CHANGE",
            "index_2": "REVCOMP"
        }, {
            "platform_igf_id": "NB500000",
            "flowcell_type": "NEXTSEQ",
            "index_1": "NO_CHANGE",
            "index_2": "REVCOMP"
        }, {
            "platform_igf_id": "M00001",
            "flowcell_type": "MISEQ",
            "index_1": "NO_CHANGE",
            "index_2": "NO_CHANGE"
        }]
        pl = PlatformAdaptor(**{'session': base.session})
        pl.store_platform_data(data=platform_data)
        pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
        project_data = [{
            'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
            'project_name': 'test_22-8-2017_rna',
            'description': 'Its project 1',
            'project_deadline': 'Before August 2017',
            'comments': 'Some samples are treated with drug X',
        }]
        pa = ProjectAdaptor(**{'session': base.session})
        pa.store_project_and_attribute_data(data=project_data)
        sample_data = [
            {
                'sample_igf_id': 'IGF00001',
                'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
            },
            {
                'sample_igf_id': 'IGF00002',
                'project_igf_id': 'IGFP0001_test_22-8-2017_rna_sc',
            },
        ]
        sa = SampleAdaptor(**{'session': base.session})
        sa.store_sample_and_attribute_data(data=sample_data)

        seqrun_data = [{
            'seqrun_igf_id': '171003_NB500000_0089_TESTABC',
            'flowcell_id': 'TESTABC',
            'platform_igf_id': 'NB500000',
            'flowcell': 'NEXTSEQ',
        }]
        sra = SeqrunAdaptor(**{'session': base.session})
        sra.store_seqrun_and_attribute_data(data=seqrun_data)
        base.close_session()
 def setUp(self):
     self.dbconfig = 'data/dbconfig.json'
     dbparam = read_dbconf_json(self.dbconfig)
     base = BaseAdaptor(**dbparam)
     self.engine = base.engine
     self.dbname = dbparam['dbname']
     Base.metadata.create_all(self.engine)
     self.session_class = base.get_session_class()
     base.start_session()
     platform_data = [{
         "platform_igf_id": "M03291",
         "model_name": "MISEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA1.18.54"
     }, {
         "platform_igf_id": "NB501820",
         "model_name": "NEXTSEQ",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA2"
     }, {
         "platform_igf_id": "K00345",
         "model_name": "HISEQ4000",
         "vendor_name": "ILLUMINA",
         "software_name": "RTA",
         "software_version": "RTA2"
     }]
     flowcell_rule_data = [{
         "platform_igf_id": "K00345",
         "flowcell_type": "HiSeq 3000/4000 SR",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }, {
         "platform_igf_id": "K00345",
         "flowcell_type": "HiSeq 3000/4000 PE",
         "index_1": "NO_CHANGE",
         "index_2": "REVCOMP"
     }, {
         "platform_igf_id": "NB501820",
         "flowcell_type": "NEXTSEQ",
         "index_1": "NO_CHANGE",
         "index_2": "REVCOMP"
     }, {
         "platform_igf_id": "M03291",
         "flowcell_type": "MISEQ",
         "index_1": "NO_CHANGE",
         "index_2": "NO_CHANGE"
     }]
     pl = PlatformAdaptor(**{'session': base.session})
     pl.store_platform_data(data=platform_data)
     pl.store_flowcell_barcode_rule(data=flowcell_rule_data)
     seqrun_data = [{
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'flowcell_id': '000000000-BRN47',
         'platform_igf_id': 'M03291',
         'flowcell': 'MISEQ',
     }, {
         'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47',
         'flowcell_id': '000000001-BRN47',
         'platform_igf_id': 'NB501820',
         'flowcell': 'NEXTSEQ',
     }]
     sra = SeqrunAdaptor(**{'session': base.session})
     sra.store_seqrun_and_attribute_data(data=seqrun_data)
     project_data = [{'project_igf_id': 'projectA'}]
     pa = ProjectAdaptor(**{'session': base.session})
     pa.store_project_and_attribute_data(data=project_data)
     sample_data = [
         {
             'sample_igf_id': 'sampleA',
             'project_igf_id': 'projectA',
             'species_name': 'HG38'
         },
         {
             'sample_igf_id': 'sampleB',
             'project_igf_id': 'projectA',
             'species_name': 'UNKNOWN'
         },
     ]
     sa = SampleAdaptor(**{'session': base.session})
     sa.store_sample_and_attribute_data(data=sample_data)
     experiment_data = [
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleA',
             'experiment_igf_id': 'sampleA_MISEQ',
             'library_name': 'sampleA',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'MISEQ',
         },
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleA',
             'experiment_igf_id': 'sampleA_NEXTSEQ',
             'library_name': 'sampleA',
             'library_source': 'UNKNOWN',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'NEXTSEQ',
         },
         {
             'project_igf_id': 'projectA',
             'sample_igf_id': 'sampleB',
             'experiment_igf_id': 'sampleB_MISEQ',
             'library_name': 'sampleB',
             'library_source': 'TRANSCRIPTOMIC_SINGLE_CELL',
             'library_strategy': 'RNA-SEQ',
             'experiment_type': 'TENX-TRANSCRIPTOME-3P',
             'library_layout': 'PAIRED',
             'platform_name': 'MISEQ',
         },
     ]
     ea = ExperimentAdaptor(**{'session': base.session})
     ea.store_project_and_attribute_data(data=experiment_data)
     run_data = [{
         'experiment_igf_id': 'sampleA_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'sampleA_MISEQ_000000000-BRN47_1',
         'lane_number': '1'
     }, {
         'experiment_igf_id': 'sampleA_NEXTSEQ',
         'seqrun_igf_id': '180416_NB03291_013_000000001-BRN47',
         'run_igf_id': 'sampleA_NEXTSEQ_000000001-BRN47_2',
         'lane_number': '2'
     }, {
         'experiment_igf_id': 'sampleB_MISEQ',
         'seqrun_igf_id': '180416_M03291_0139_000000000-BRN47',
         'run_igf_id': 'sampleB_MISEQ_HVWN7BBXX_1',
         'lane_number': '1'
     }]
     ra = RunAdaptor(**{'session': base.session})
     ra.store_run_and_attribute_data(data=run_data)
     file_data = [
         {
             'file_path':
             '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
         {
             'file_path':
             '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
         {
             'file_path': '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz',
             'location': 'HPC_PROJECT',
             'md5': 'fd5a95c18ebb7145645e95ce08d729e4',
             'size': '1528121404',
         },
     ]
     fa = FileAdaptor(**{'session': base.session})
     fa.store_file_and_attribute_data(data=file_data)
     collection_data = [{
         'name': 'sampleA_MISEQ_000000000-BRN47_1',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }, {
         'name': 'sampleA_NEXTSEQ_000000001-BRN47_2',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }, {
         'name': 'sampleB_MISEQ_HVWN7BBXX_1',
         'type': 'demultiplexed_fastq',
         'table': 'run'
     }]
     collection_files_data = [{
         'name':
         'sampleA_MISEQ_000000000-BRN47_1',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleA_MISEQ_000000000-BRN47_1_R1.fastq.gz'
     }, {
         'name':
         'sampleA_NEXTSEQ_000000001-BRN47_2',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleA_NEXTSEQ_000000001-BRN47_2_R1.fastq.gz'
     }, {
         'name':
         'sampleB_MISEQ_HVWN7BBXX_1',
         'type':
         'demultiplexed_fastq',
         'file_path':
         '/path/sampleB_MISEQ_HVWN7BBXX_1_R1.fastq.gz'
     }]
     ca = CollectionAdaptor(**{'session': base.session})
     ca.store_collection_and_attribute_data(data=collection_data)
     ca.create_collection_group(data=collection_files_data)
     base.close_session()