def test_process_project_data_and_account(self):
     fa=Find_and_register_new_project_data(projet_info_path=os.path.join('.','data/check_project_data'),\
                                         dbconfig=self.dbconfig,\
                                         user_account_template='template/email_notification/send_new_account_info.txt',\
                                         log_slack=False,\
                                         setup_irods=False,\
                                         notify_user=False,\
                                         check_hpc_user=False,\
                                         )
     fa.process_project_data_and_account()
     dbparam = None
     with open(self.dbconfig, 'r') as json_data:
       dbparam = json.load(json_data)
     base = BaseAdaptor(**dbparam)
     base.start_session()
     pa=ProjectAdaptor(**{'session':base.session})
     project_exists=pa.check_project_records_igf_id(project_igf_id='IGFP0002_test_23-5-2017_rna')
     self.assertTrue(project_exists)
     ua=UserAdaptor(**{'session':base.session})
     user_exists=ua.check_user_records_email_id(email_id='*****@*****.**')
     self.assertTrue(user_exists)
     user1=ua.fetch_user_records_email_id(user_email_id='*****@*****.**')
     self.assertEqual(user1.name,'User2')
     sa=SampleAdaptor(**{'session':base.session})
     sample_exists=sa.check_sample_records_igf_id(sample_igf_id='IGF00006')
     self.assertTrue(sample_exists)
     project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\
                                                        email_id='*****@*****.**')
     self.assertTrue(project_user_exists)
     project_user_exists=pa.check_existing_project_user(project_igf_id='IGFP0002_test_23-5-2017_rna',\
                                                        email_id='*****@*****.**')
     self.assertTrue(project_user_exists)
     base.close_session()
 def test_store_sample_and_attribute_data(self):
     sa = SampleAdaptor(**{'session_class': self.session_class})
     sample_data = [
         {
             'sample_igf_id': 'IGFS001',
             'library_id': 'IGFS001',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         },
         {
             'sample_igf_id': 'IGFS002',
             'library_id': 'IGFS002',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         },
         {
             'sample_igf_id': 'IGFS003',
             'library_id': 'IGFS003',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         },
         {
             'sample_igf_id': 'IGFS004',
             'library_id': 'IGFS004',
             'project_igf_id': 'IGFP0001_test_22-8-2017_rna',
         },
     ]
     sa.start_session()
     sa.store_sample_and_attribute_data(data=sample_data)
     sa1 = sa.check_sample_records_igf_id(sample_igf_id='IGFS001')
     sa.close_session()
     self.assertEqual(sa1, True)
Exemplo n.º 3
0
    def _check_existing_data(self,
                             data,
                             dbsession,
                             table_name,
                             check_column='EXISTS'):
        '''
    An internal function for checking and registering project info
    
    :param data: A pandas data series
    :param dbsession: A sqlalchemy database session object
    :param table_name: A database table name
    :param check_column: Column name for existing data
    '''
        try:
            if not isinstance(data, pd.Series):
                raise ValueError('Expecting a data series and got {0}'.format(
                    type(data)))
            if table_name == 'project':
                if self.project_lookup_column in data and \
                   not pd.isnull(data[self.project_lookup_column]):
                    project_igf_id = data[self.project_lookup_column]
                    pa = ProjectAdaptor(**{'session': dbsession
                                           })  # connect to project adaptor
                    project_exists = pa.check_project_records_igf_id(
                        project_igf_id)
                    if project_exists:  # store data only if project is not existing
                        data[check_column] = True
                    else:
                        data[check_column] = False
                    return data
                else:
                    raise ValueError('Missing or empty required column {0}'.\
                                     format(self.project_lookup_column))
            elif table_name == 'user':
                if self.user_lookup_column in data and \
                   not pd.isnull(data[self.user_lookup_column]):
                    user_email = data[self.user_lookup_column]
                    ua = UserAdaptor(**{'session':
                                        dbsession})  # connect to user adaptor
                    user_exists = ua.check_user_records_email_id(
                        email_id=user_email)
                    if user_exists:  # store data only if user is not existing
                        data[check_column] = True
                    else:
                        data[check_column] = False
                    return data
                else:
                    raise ValueError('Missing or empty required column {0}'.\
                                     format(self.user_lookup_column))
            elif table_name == 'sample':
                if self.sample_lookup_column in data and \
                   not pd.isnull(data[self.sample_lookup_column]):
                    project_igf_id = data[self.project_lookup_column]
                    sample_igf_id = data[self.sample_lookup_column]
                    sa = SampleAdaptor(**{'session': dbsession
                                          })  # connect to sample adaptor
                    sample_project_exists=sa.check_project_and_sample(project_igf_id=project_igf_id,\
                                                                      sample_igf_id=sample_igf_id) # check for existing sample_id and project-id combination
                    if sample_project_exists:  # store data only if sample is not existing
                        data[check_column] = True
                    else:
                        sample_exists = sa.check_sample_records_igf_id(
                            sample_igf_id)  # check for existing sample
                        if sample_exists:
                            raise ValueError('Sample {0} exists in database but not associated with project {1}'.\
                                             format(sample_igf_id,project_igf_id))            # inconsistency in sample project combination
                        data[check_column] = False
                    return data
                else:
                    raise ValueError('Missing or empty required column {0}'.\
                                     format(self.sample_lookup_column))
            elif table_name == 'project_user':
                if self.user_lookup_column in data and \
                    not pd.isnull(data[self.user_lookup_column]) and \
                   self.project_lookup_column in data and \
                    not pd.isnull(data[self.project_lookup_column]):
                    project_igf_id = data[self.project_lookup_column]
                    user_email = data[self.user_lookup_column]
                    pa = ProjectAdaptor(**{'session': dbsession
                                           })  # connect to project adaptor
                    project_user_exists=pa.check_existing_project_user(project_igf_id,\
                                                                       email_id=user_email)
                    if user_email != self.default_user_email and \
                       (self.data_authority_column not in data or \
                       pd.isnull(data[self.data_authority_column])):
                        data[
                            self.
                            data_authority_column] = True  # set user as data authority, filter default user

                    if project_user_exists:  # store data only if sample is not existing
                        data[check_column] = True
                    else:
                        data[check_column] = False
                    return data
                else:
                    raise ValueError('Missing or empty required column {0}, {1}'.\
                                     format(self.project_lookup_column,\
                                            self.user_lookup_column))
            else:
                raise ValueError('table {0} not supported'.format(table_name))
        except:
            raise
    def load_file_to_disk_and_db(self,
                                 input_file_list,
                                 withdraw_exisitng_collection=True,
                                 autosave_db=True,
                                 file_suffix=None,
                                 force=True,
                                 remove_file=False):
        '''
    A method for loading analysis results to disk and database. File will be moved to a new path if base_path is present.
    Directory structure of the final path is based on the collection_table information.
    
    Following will be the final directory structure if base_path is present
    
    project - base_path/project_igf_id/analysis_name
    sample - base_path/project_igf_id/sample_igf_id/analysis_name
    experiment - base_path/project_igf_id/sample_igf_id/experiment_igf_id/analysis_name
    run - base_path/project_igf_id/sample_igf_id/experiment_igf_id/run_igf_id/analysis_name
    
    :param input_file_list: A list of input file to load, all using the same collection info
    :param withdraw_exisitng_collection: Remove existing collection group, DO NOT use this while loading a list of files
    :param autosave_db: Save changes to database, default True
    :param file_suffix: Use a specific file suffix, use None if it should be same as original file
                        e.g. input.vcf.gz to  output.vcf.gz
    :param force: Toggle for removing existing file, default True
    :param remove_file: A toggle for removing existing file from disk, default False
    :returns: A list of final filepath
    '''
        try:
            project_igf_id = None
            sample_igf_id = None
            experiment_igf_id = None
            experiment_igf_id = None
            run_igf_id = None
            output_path_list = list()  # define empty output list
            dbconnected = False
            if self.collection_name is None or \
               self.collection_type is None or \
               self.collection_table is None:
                raise ValueError('File collection information is incomplete'
                                 )  # check for collection information

            base = BaseAdaptor(**{'session_class': self.dbsession_class})
            base.start_session()  # connect to db
            dbconnected = True
            if self.base_path is not None:
                if self.collection_table == 'sample':
                    sa = SampleAdaptor(**{'session': base.session})
                    sample_igf_id = self.collection_name
                    sample_exists = sa.check_sample_records_igf_id(
                        sample_igf_id=sample_igf_id)
                    if not sample_exists:
                        raise ValueError('Sample {0} not found in db'.\
                                         format(sample_igf_id))

                    project_igf_id = \
                      sa.fetch_sample_project(sample_igf_id=sample_igf_id)                # fetch project id for sample
                elif self.collection_table == 'experiment':
                    ea = ExperimentAdaptor(**{'session': base.session})
                    experiment_igf_id = self.collection_name
                    experiment_exists = \
                      ea.check_experiment_records_id(
                        experiment_igf_id=experiment_igf_id)
                    if not experiment_exists:
                        raise ValueError('Experiment {0} not present in database'.\
                                         format(experiment_igf_id))

                    (project_igf_id,sample_igf_id) = \
                        ea.fetch_project_and_sample_for_experiment(
                          experiment_igf_id=experiment_igf_id)                            # fetch project and sample id for experiment
                elif self.collection_table == 'run':
                    ra = RunAdaptor(**{'session': base.session})
                    run_igf_id = self.collection_name
                    run_exists = ra.check_run_records_igf_id(
                        run_igf_id=run_igf_id)
                    if not run_exists:
                        raise ValueError('Run {0} not found in database'.\
                                         format(run_igf_id))

                    (project_igf_id,sample_igf_id,experiment_igf_id) = \
                      ra.fetch_project_sample_and_experiment_for_run(
                        run_igf_id=run_igf_id)                                            # fetch project, sample and experiment id for run
                elif self.collection_table == 'project':
                    pa = ProjectAdaptor(**{'session': base.session})
                    project_igf_id = self.collection_name
                    project_exists = \
                      pa.check_project_records_igf_id(
                        project_igf_id=project_igf_id)
                    if not project_exists:
                        raise ValueError('Project {0} not found in database'.\
                                         format(project_igf_id))

            if self.rename_file and self.analysis_name is None:
                raise ValueError('Analysis name is required for renaming file'
                                 )  # check analysis name

            for input_file in input_file_list:
                final_path = ''
                if self.base_path is None:  # do not move file if base_path is absent
                    final_path = os.path.dirname(input_file)
                else:  # move file path
                    if self.collection_table == 'project':
                        if project_igf_id is None:
                            raise ValueError('Missing project id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            self.analysis_name)                                             # final path for project
                    elif self.collection_table == 'sample':
                        if project_igf_id is None or \
                           sample_igf_id is None:
                            raise ValueError('Missing project and sample id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            self.analysis_name)                                             # final path for sample
                    elif self.collection_table == 'experiment':
                        if project_igf_id is None or \
                           sample_igf_id is None or \
                           experiment_igf_id is None:
                            raise ValueError('Missing project,sample and experiment id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            experiment_igf_id,
                            self.analysis_name)                                             # final path for experiment
                    elif self.collection_table == 'run':
                        if project_igf_id is None or \
                           sample_igf_id is None or \
                           experiment_igf_id is None or \
                           run_igf_id is None:
                            raise ValueError('Missing project,sample,experiment and run id for collection {0}'.\
                                             format(self.collection_name))

                        final_path = \
                          os.path.join(\
                            self.base_path,
                            project_igf_id,
                            sample_igf_id,
                            experiment_igf_id,
                            run_igf_id,
                            self.analysis_name)                                             # final path for run

                if self.rename_file:
                    new_filename = \
                      self.get_new_file_name(
                        input_file=input_file,
                        file_suffix=file_suffix)
                    final_path = \
                      os.path.join(
                        final_path,
                        new_filename)                                                     # get new filepath
                else:
                    final_path = \
                      os.path.join(
                        final_path,
                        os.path.basename(input_file))

                if final_path != input_file:  # move file if its required
                    final_path = preprocess_path_name(
                        input_path=final_path
                    )  # remove unexpected characters from file path
                    move_file(source_path=input_file,
                              destinationa_path=final_path,
                              force=force
                              )  # move or overwrite file to destination dir

                output_path_list.append(
                    final_path)  # add final path to the output list
                self.create_or_update_analysis_collection(
                    file_path=final_path,
                    dbsession=base.session,
                    withdraw_exisitng_collection=withdraw_exisitng_collection,
                    remove_file=remove_file,
                    autosave_db=autosave_db)  # load new file collection in db
                if autosave_db:
                    base.commit_session()  # save changes to db for each file

            base.commit_session()  # save changes to db
            base.close_session()  # close db connection
            return output_path_list
        except:
            if dbconnected:
                base.rollback_session()
                base.close_session()
            raise