def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    
    submit_to_test_db=opts.submit_to_test_db
    fasta_file_paths=opts.fasta_file_paths
    study_id=opts.study_id
    output_dir=opts.output_dir
    analysis_id=opts.analysis_id
    seq_run_id=opts.seq_run_id
    user_id=opts.user_id
    split_lib_md5=opts.split_lib_md5
    
    if submit_to_test_db == 'False':
        # Load the data into the database
        data_access = data_access_factory(ServerConfig.data_access_type)
    else:
        # Load the data into the database 
        data_access = data_access_factory(DataAccessType.qiime_test)

    split_library_id=load_split_lib_sequences(data_access,output_dir,
                                              analysis_id, seq_run_id,
                                              split_lib_md5)
    
    print 'Completed database loading.'
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    fasta_file = dict(MinimalFastaParser(open(opts.input_file,'U')))
    fname=split(opts.input_file)[-1].split('_')
    ref_dataset=fname[0]
    if ref_dataset=='gg':
        reference_dataset='GREENGENES_REFERENCE'
    threshold=fname[1]
    print threshold
    try:
        from data_access_connections import data_access_factory
        from enums import ServerConfig
        import cx_Oracle
        if opts.submit_to_test_db:
            data_access = data_access_factory(DataAccessType.qiime_test)
        else:
            data_access = data_access_factory(ServerConfig.data_access_type)
    except ImportError:
        print "NOT IMPORTING QIIMEDATAACCESS"
        pass
        
    prokmsas=[]
    for prok_id in fasta_file:
        prokmsas.append('%s\t%s\t%s' % (str(prok_id),str(threshold),
                                            reference_dataset))
    con = data_access.getSFFDatabaseConnection()
    cur = con.cursor()

    data_types=['s','i','s']
    for input_set in input_set_generator(prokmsas, cur,data_types):
        valid=data_access.loadSeqToSourceMap(True,input_set)
        if not valid:
            raise ValueError, 'Error: Unable to load Sequence to Source Map!'
Exemplo n.º 3
0
def export_full_db_to_fasta(output_fasta_name, distinct_list):
    """
    Exports the entire sequence collection to fasta
    
    This function exports the entire database to fasta format. It does not care
    about public/private nor does it depend on any linkages to other metadata.
    """
    output_fasta = open(output_fasta_name, 'w')
    data_access = data_access_factory(ServerConfig.data_access_type)
    
    seqs = data_access.getSequencesFullDatabase()
    md5s = []
    for seq in seqs:
        sequence_name, sequence_string, md5_checksum = seq[0], seq[1], seq[2]
        
        if distinct_list:
            if md5_checksum not in md5s:
                md5s.append(md5_checksum)
                output_fasta.write('>%s\n%s\n' % (sequence_name, sequence_string))
                print 'Exporting sequence: %s' % sequence_name
            else:
                print 'Duplicate checksum found for sequence name: %s. Skipping...' % sequence_name
        else:
            #output_fasta.write('>%s\n%s\n' % (sequence_name, sequence_string))
            print 'Exporting sequence: %s' % sequence_name
Exemplo n.º 4
0
 def __init__(self, *args, **kwargs):
     self.data_access=data_access_factory(ServerConfig.data_access_type)
     self.username = os.environ['USER']
     self.home = os.environ['HOME']
     self.Jobs = {} # pbs job id -> job object
     self.interval = 0
     super(Poller, self).__init__(*args, **kwargs)
Exemplo n.º 5
0
def export_fasta_from_sample(study_id, sample_id, output_fasta):
    """
    Exports all sequences for the supplied sample_id
    
    This function exports all sequences associated to the given study_id. Note that
    the sequences must directly map to the sample_id supplied.
    """
    # If name passed is a string, open the file. Otherwise ignore as the file
    # has already been opened by the parent
    file_opened_here = False
    if isinstance(output_fasta, str):
        output_fasta = open(output_fasta, "w")
        file_opened_here = True

    # Get our copy of data_access
    data_access = data_access_factory(ServerConfig.data_access_type)
    seqs = data_access.getSequencesFromSample(study_id, sample_id)
    # print '------------------------------ Seqs for sample ID: %s' % str(sample_id)
    for seq in seqs:
        output_fasta.write(">%s\n%s\n" % (seq, seqs[seq]))
        # print seq

    # Close the file if opened in this function
    if file_opened_here:
        output_fasta.close()
Exemplo n.º 6
0
def export_fasta_from_study(study_id, output_fasta):
    """
    Exports a fasta file for all sequences found in the supplied study
    
    This function exports all sequences associated to the given study id. Note that
    the sequences must map to a sample in the study to be exported. 
    """
    # If name passed is a string, open the file. Otherwise ignore as the file
    # has already been opened by the parent
    file_opened_here = False
    if isinstance(output_fasta, str):
        output_fasta = open(output_fasta, "w")
        file_opened_here = True

    # Get our copy of data access
    data_access = data_access_factory(ServerConfig.data_access_type)

    # Get all samples for this study
    sample_ids = data_access.getSampleIDsFromStudy(study_id)
    for sample_id in sample_ids:
        export_fasta_from_sample(study_id, sample_id, output_fasta)

    # Close the file if opened in this function
    if file_opened_here:
        output_fasta.close()
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    try:
        from data_access_connections import data_access_factory
        from enums import ServerConfig
        import cx_Oracle
        data_access = data_access_factory(ServerConfig.data_access_type)
    except ImportError:
        print "NOT IMPORTING QIIMEDATAACCESS"
        pass
        
    query_dict=eval(open(opts.query).read())
    table_col_value={}
    for i in query_dict:
        if i not in ['otu_table','mapping_file','pcoa_plot']:
            table_col_value[i]=query_dict[i]
            
    fs_fp=opts.fs_fp
    web_fp=opts.web_fp
    file_name_prefix=opts.fname_prefix
    user_id=int(opts.user_id)
    meta_id=int(opts.meta_id)
    beta_metric=opts.beta_metric
    rarefied_at=int(opts.rarefied_at)
    
    write_mapping_and_pcoa_plots(data_access, table_col_value, fs_fp, web_fp, file_name_prefix,user_id,meta_id,beta_metric,rarefied_at)
Exemplo n.º 8
0
def get_ag_metadata_bulk(barcodes, participant_type):
    """Calls ag_get_barcode_metadata on a list of barcodes

    barcodes should be an iterable list of barcodes (or an open
    file that has one barcode per line)

    participant_type should be either 'human' or 'animal'
    """
    if participant_type not in ('human', 'animal'):
        raise ValueError("participant_type must be either 'human' or 'animal'")

    ag_data_access = data_access_factory(ServerConfig.data_access_type,
                                         'american_gut')

    results = []
    for line in barcodes:
        bc = line.strip()
        if participant_type == 'human':
            metadata = ag_data_access.AGGetBarcodeMetadata(bc)
        else: # participant_type == 'animal'
            metadata = ag_data_access.AGGetBarcodeMetadataAnimal(bc)
        if len(metadata) != 1:
            yield False, bc
        else:
            yield True, metadata[0]
Exemplo n.º 9
0
 def __init__(self, metadataFile, study_id):
     self._invalid_rows = []
     self._columns = []
     self._log = []
     self._metadataFile = metadataFile
     self._data_access = data_access_factory(ServerConfig.data_access_type)
     self._study_id = study_id
Exemplo n.º 10
0
 def __init__(self, study_id, web_app_user_id, logger):
     self.data_access = data_access_factory(ServerConfig.data_access_type)
     self.study_id = study_id
     self.web_app_user_id = web_app_user_id
     self.invalid_values = set(['', ' ', None, 'None'])
     self.required_columns = set(['library_construction_protocol', 'experiment_design_description', 'taxon_id', 'description'])
     self.study_info = None
     self.logger = logger
Exemplo n.º 11
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Some needed variables
    otu_seqs_file = opts.otu_seqs_file
    debug = opts.debug
    data_access = data_access_factory(ServerConfig.data_access_type)

    # Load the GG sequences
    load_gg_seqs(otu_seqs_file, data_access)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    try:
        from data_access_connections import data_access_factory
        from enums import ServerConfig
        import cx_Oracle
        if opts.submit_to_test_db:
            data_access = data_access_factory(DataAccessType.qiime_test)
        else:
            data_access = data_access_factory(ServerConfig.data_access_type)
    except ImportError:
        print "NOT IMPORTING QIIMEDATAACCESS"
        pass
        
    input_dir=opts.input_otu_dir

    load_otu_mapping(data_access,input_dir)
Exemplo n.º 13
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Some needed variables
    otu_seqs_file = opts.otu_seqs_file
    debug = opts.debug
    data_access = data_access_factory(ServerConfig.data_access_type)

    # Get results for all processed_data_ folders in this study's directory
    load_gg_seqs(otu_seqs_file, data_access)
def run_process_illumina_through_split_lib(study_id,run_prefix,input_fp,
    mapping_fp, output_dir, 
    command_handler, params, qiime_config,
    write_to_all_fasta=False,
    status_update_callback=print_to_stdout):
    """ NOTE: Parts of this function are a directly copied from the
        run_qiime_data_preparation function from the workflow.py library file 
        in QIIME.
    
        The steps performed by this function are:
          1) De-multiplex sequences. (split_libraries_fastq.py)
    
    """

    # Prepare some variables for the later steps
    filenames=input_fp.split(',')
    commands = []
    create_dir(output_dir)
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params,
                            qiime_config=qiime_config)
    
    # copy the mapping file
    copied_mapping=split(mapping_fp)[-1]
    mapping_input_fp_copy=join(output_dir, copied_mapping)
    copy_mapping_cmd='cp %s %s' % (mapping_fp,mapping_input_fp_copy)
    commands.append([('CopyMapping', copy_mapping_cmd)])

    # sort the filenames
    filenames.sort()
    
    # determine which file is seq-file and which is barcode-file and associate
    # to mapping file
    if len(filenames) == 1:
        try:
            # Format of sample_id needs to be seqs_<sample_name>.<sequence_prep_id>.fastq
            data_access = data_access_factory(ServerConfig.data_access_type)
            sql = """
            select  s.sample_name || '.' || sp.sequence_prep_id 
            from    sample s 
                    inner join sequence_prep sp 
                    on s.sample_id = sp.sample_id
            where   s.study_id = {0}
                    and sp.run_prefix = '{1}'
            """.format(study_id, run_prefix[:-1])
            sample_and_prep = data_access.dynamicMetadataSelect(sql).fetchone()[0]
            input_str = '-i {0} --sample_id {1}'.format(filenames[0], sample_and_prep)
        except Exception, e:
            error = 'Failed to obtain sample and sequence prep info for study_id {0} and run_prefix {1}\n'.format(study_id, run_prefix)
            error += 'SQL was: \n {0} \n'.format(sql)
            error += 'Original exception was: \n {0}'.format(str(e))
            raise Exception(error)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    try:
        from data_access_connections import data_access_factory
        from enums import ServerConfig
        import cx_Oracle
        if opts.submit_to_test_db:
            data_access = data_access_factory(DataAccessType.qiime_test)
        else:
            data_access = data_access_factory(ServerConfig.data_access_type)
    except ImportError:
        print "NOT IMPORTING QIIMEDATAACCESS"
        pass
        
    fasta_files=opts.processed_fasta_fnames
    study_id=opts.study_id

    analysis_id=submit_processed_data_to_db(data_access,fasta_files=fasta_files,metadata_study_id=study_id)
def submitJobsToQiime(study_id, user_id, mapping_file_dir,process_only,submit_to_test_db):
    # Instantiate one copy of data access for this process
    data_access = data_access_factory(ServerConfig.data_access_type)
    
    # Get the SFF files associated to this study
    sff_files = data_access.getSFFFiles(study_id)
    
    # Get the SFF files associated to this study
    sequencing_platform = data_access.getStudyPlatform(study_id)
    
    # Generate the mapping files
    mapping_files = writeMappingFiles(study_id, data_access, mapping_file_dir)
    
    # Figure out which mapping file pairs with each SFF file
    file_map = {}
    param_map={}
    for mapping_file in mapping_files:
        # Skip the mapping file if it's not of the correct naming format
        if len(mapping_file.split('__')) != 2:
            continue

        run_prefix = os.path.basename(mapping_file).split('__')[0]
        matching_sff_files = []
        
        # Find the proper params file
        barcode_length = data_access.checkRunPrefixBarcodeLengths(study_id, run_prefix)
        param_file = '%s/git/qiime_web_app/python_code/parameter_files/%s__custom_parameters_uclust_ref_gg97.txt' % (ServerConfig.home,str(barcode_length))
        param_map[mapping_file]=param_file
        
        for sff_file in sff_files:
            sff_file_basename = os.path.splitext(os.path.basename(sff_file))[0].upper()
            # If the run_prefix matches the SFF file name exactly, assume only
            # one SFF for this run
            if run_prefix.upper() == os.path.splitext(sff_file_basename)[0].upper():
                matching_sff_files.append(sff_file)
                file_map[mapping_file] = matching_sff_files
                continue
                
            # If the run_prefix is contained in the file name, find all that match
            # and submit them together with the current mapping file
            elif sff_file_basename.startswith(run_prefix.upper()):
                # If it's the first item for this mapping file name, assign the list
                if not file_map.get(mapping_file):
                    file_map[mapping_file] = matching_sff_files
                file_map[mapping_file].append(sff_file)
            # If we get here, there are extra SFF files with no matching mapping file. 
            # For now, do nothing... may need to add some handling code at a later date.
            else:
                pass
    
    # Submit jobs to the queue
    for mapping_file in file_map:
        submitJob(study_id, user_id, param_map[mapping_file], mapping_file, sequencing_platform, file_map[mapping_file],process_only,submit_to_test_db, data_access)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    # get command-line parameters
    rep_set_fp=opts.rep_set_fp
    otu_map_fp=opts.otu_map_fp
    sequence_source=opts.sequence_source
    output_dir = opts.output_dir
    
    # create output directory
    create_dir(output_dir)

    # get data access connection
    data_access = data_access_factory(ServerConfig.data_access_type)

    # write PL/SQL query statement
    statement="select rf.ssu_sequence_id,rf.reference_id from gg_plus_denovo_reference rf " +\
    "inner join sequence_source ss on rf.sequence_source_id=ss.sequence_source_id " +\
    "where ss.source_name='%s'" % (sequence_source)
    #print statement 
    
    # Run the statement
    con = data_access.getSFFDatabaseConnection()
    cur = con.cursor()
    
    # create a lookup dictionary
    results = cur.execute(statement)
    #print results
    ssu_id_to_ref_id={}
    for i in results:
        ssu_id_to_ref_id[str(i[0])]=str(i[1])
    
    # write new fasta file with updated assignments
    new_fasta_fp=join(output_dir,'rep_set_reassigned_otu_ids.fasta')
    openfasta=open(new_fasta_fp,'w')
    
    # write a mapping file for topiary explorer
    new_map_fp=join(output_dir,'new_otu_id_mapping.txt')
    openmap=open(new_map_fp,'w')
    # parse and write new fasta file
    seqs=MinimalFastaParser(open(rep_set_fp,'U'))
    for seq_name,seq in seqs:
         seqs_name_split=seq_name.split()
         tmp_store=seqs_name_split[0]
         seqs_name_split[0]=seqs_name_split[1]
         seqs_name_split[1]=tmp_store
         openfasta.write('>%s\n%s\n' % (' '.join(seqs_name_split),seq))
         
         openmap.write('%s\t%s\n' % (seqs_name_split[0],sequence_source))
         
    openfasta.close()
    openmap.close()
Exemplo n.º 18
0
 def test_print_study_info_and_values_table(self):
     """ test_print_study_info_and_values_table: This function write the 
         Study summary information below the select-box
     """
     data_access = data_access_factory(ServerConfig.data_access_type)
     analysis_data=[]
     results=data_access.getQiimeSffDbSummary(0)
     
     for row in results:
         analysis_data.append(row)
     
     self.assertEqual(print_study_info_and_values_table(analysis_data,data_access),exp_output)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    
    submit_to_test_db=opts.submit_to_test_db
    fasta_file_paths=opts.fasta_file_paths
    study_id=opts.study_id
    output_dir=opts.output_dir
    platform=opts.platform
    user_id=opts.user_id
    
    if submit_to_test_db == 'False':
        # Load the data into the database
        data_access = data_access_factory(ServerConfig.data_access_type)
    else:
        # Load the data into the database 
        data_access = data_access_factory(DataAccessType.qiime_test)

    # Get all of the fasta files
    if (platform=='FLX' or platform=='TITANIUM'):
        print 'Submitting SFF data to database...'
        analysis_id = submit_sff_and_split_lib(data_access, fasta_file_paths, study_id)
    elif platform=='ILLUMINA':
        print 'Submitting Illumina data to database...'
        analysis_id = submit_illumina_and_split_lib(data_access, fasta_file_paths, study_id,output_dir)
    elif platform=='FASTA':
        print 'Submitting FASTA data to database...'
        analysis_id = submit_fasta_and_split_lib(data_access, fasta_file_paths, study_id, output_dir)
    
    study_info=data_access.getStudyInfo(study_id,user_id)
    if study_info['investigation_type'].lower() == 'metagenome':
        # skip OTU loading
        pass
    else:
        print 'Submitting OTU data to database...'
        load_otu_mapping(data_access, output_dir, analysis_id)
    
    print 'Completed database loading.'
def validateFileExistence(study_id, study_dir):
    '''
    check the existence of sequence files in the filesystem 
    for each sequence filename in the database.
    '''
    data_access = data_access_factory(ServerConfig.data_access_type)
    absence_list = []
    for filename in data_access.getSFFFiles(study_id):
        filename  = os.path.basename(filename)
        file_list = os.listdir(study_dir)
        if not filename in file_list:
            absence_list.append(filename)
    return absence_list
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    really = opts.really

    # if level is set to DEBUG log messages will be written
    logging.basicConfig(filename=opts.output_log_fp, level=logging.DEBUG, \
        format='[%(asctime)s].%(levelname)s: %(message)s')

    ag_data_access = data_access_factory(ServerConfig.data_access_type,
    'american_gut')

    # cursor to update the sent e-mails
    con = ag_data_access.getMetadataDatabaseConnection()

    cursor = ag_data_access.dynamicMetadataSelect("""
        select  al.name, al.email, ak.kit_verification_code, ak.supplied_kit_id, ak.kit_password, ak.swabs_per_kit
        from ag_login al
            inner join ag_kit ak
            on al.ag_login_id = ak.ag_login_id
            where ak.verification_email_sent = 'n'
        order by al.email""")

    for entry in cursor:
        recipient_name, target_email, verification_code, supplied_kit_id,\
            kit_password, swabs_per_kit = entry

        logging.debug('\n+++++++++++++++++++++++++++++++++++++++++++++++++++\n')

        logging.debug("""recipient_name {0}, target_email {1}, """
            """verification_code {2}, supplied_kit_id {3}, kit_password {4}, """
            """swabs_per_kit {5}\n""".format(recipient_name, target_email,
            verification_code, supplied_kit_id, kit_password, swabs_per_kit))

        buffer_message = BODY_MESSAGE.format(recipient_name, supplied_kit_id, verification_code)

        try:
            logging.debug('Message is %s\n' % buffer_message)
            logging.debug('Sent to %s\n' % target_email)
            
            if really == True:
                send_email(buffer_message, SUBJECT, target_email)
                query_string = "update ag_kit set verification_email_sent = 'y' where supplied_kit_id = '{0}'".format(supplied_kit_id)
                con.cursor().execute(query_string)
                con.cursor().execute('commit')
            else:
                logging.debug('DRY RUNNING, NOT SENDING A MESSAGE\n')
        except Exception, e:
            logging.debug('Exception value is %s\n' % str(e))
            logging.debug('ERROR SENDING TO: %s' % target_email)

        logging.debug('+++++++++++++++++++++++++++++++++++++++++++++++++++\n\n')
def exportStudyToEBISRA(study_id, user_id):
    # Instantiate one copy of data access for this process
    data_access = None
    try:
        data_access = data_access_factory(ServerConfig.data_access_type)

        # Submit the job
        job_id = data_access.createTorqueJob('ExportToEBISRAHandler', 'StudyID=%s' % study_id, user_id, study_id)

        # Make sure a legit job_id was created. If not, inform the user there was a problem
        if job_id < 0:
            raise Exception('There was an error creating the job. Please contact the system administrator.')
    finally:
        data_access = None
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    
    submit_to_test_db=opts.submit_to_test_db
    fasta_file_paths=opts.fasta_file_paths
    study_id=opts.study_id
    output_dir=opts.output_dir
    
    if submit_to_test_db == 'False':
        # Load the data into the database
        data_access = data_access_factory(ServerConfig.data_access_type)
    else:
        # Load the data into the database 
        data_access = data_access_factory(DataAccessType.qiime_test)


    # Get all of the fasta files
    print 'Submitting SFF data to database...'
    analysis_id = submit_sff_and_split_lib(data_access, fasta_file_paths, study_id)
    print 'Submitting OTU data to database...'
    load_otu_mapping(data_access, output_dir, analysis_id)
    print 'Completed database loading.'
Exemplo n.º 24
0
    def run(self):
        
        da = data_access_factory(ServerConfig.data_access_type)
        item_count = len(self.item_list)
        
        for item in self.item_list:
            
            # Reset the key_field
            key_field = None
        
            # Put the parts into more meaningful variables
            parts = item.split(':')
            field_type = parts[0]
            row_num = parts[1]
            field_name = parts[3]
            field_value = self.form[item]
        
            # Figure out what the key field is going to be
            if field_type == 'sample':
                key_field = self.sample_key_fields[row_num]
            elif field_type == 'prep':
                key_field = self.prep_key_fields[row_num]
            elif field_type == 'study':
                key_field = self.study_name
        
            if len(self.host_key_fields) > 0 and field_type == 'sample':
                try:
                    host_key_field = self.host_key_fields[row_num]
                except:
                    # Do nothing if not found
                    pass
            else:
                host_key_field = None
        
            # Just in case...
            if key_field == None:
                continue
        
            # For oracle, clean up single quotes
            field_value = field_value.replace('\'', '\'\'')

            try:
                result = da.writeMetadataValue(field_type, key_field, field_name, field_value, \
                    self.study_id, host_key_field, row_num, self.lock)
                # Notify parent that an item was inserted
                self.updateCallback()
            except Exception, e:
                self.errorCallback(e)
 def test_process_items(self):
     """ test_process_items: This fxn processes the md5's and checks against
         the DB
     """
     #make sure files gets cleaned up
     self.files_to_remove.append(self.leftovers)
     self.files_to_remove.append(self.input_fasta)
     
     otu_map={}
     data_access = data_access_factory(ServerConfig.data_access_type)
     process_items(md5_list, md5_sequence_map, md5_seq_id_map, otu_map, 
                      data_access, open(self.leftovers,'w'))
     
     #check the outputs are correct
     self.assertEqual(open(self.leftovers).read(),exp_failures2)
     self.assertEqual(len(otu_map.keys()),1)
def submit_mapping_to_database(processed_results, debug=True):
    data_access = data_access_factory(ServerConfig.data_access_type)
    
    # Iterate over each folder's data - can be many processed_data_ folders for a single study
    for directory in processed_results:
        # Unpack the values for each processed_data_ directory
        mapping, seq_header_lines, otu_header_lines = processed_results[directory]

        # Unpack and iterate over each mapping
        for sample_name, sequence_count, otu_count, percent_assignment in mapping:
            sequence_prep_id = sample_name.split('.')[-1]
        
            # Write values to database for this sequence_prep_id        
            data_access.updateSeqOtuCounts(sequence_prep_id, sequence_count, otu_count, percent_assignment)
        
            if debug:
                print 'added to database: prep: {0}, seq_count: {1}, otu_count: {2}'.format(\
                    str(sequence_prep_id), str(sequence_count), str(otu_count))
Exemplo n.º 27
0
    def __init__(self, study_id, web_app_user_id, debug = False):
        self.hostname = None
        self.study_url = None
        self.sample_url = None
        self.library_url = None
        self.sequence_url = None
        self.study_id = study_id
        self.base_study_path = '/home/wwwuser/user_data/studies/study_{0}'.format(study_id)

        self.web_app_user_id = web_app_user_id
        # Set up a logger so we can see what's going on
        log_file_path = join(self.base_study_path, 'ebi_export_log.txt')
        self.logger = DataLogger(log_file_path, debug)
        
        self.rest_data_helper = RestDataHelper(study_id, web_app_user_id, self.logger)
        self.data_access = data_access_factory(ServerConfig.data_access_type)
        #self.errors = []
        self.debug = debug
Exemplo n.º 28
0
def export_db_to_fasta(output_fasta_name):
    """
    Exports sequences to fasta that have corresponding metadata
    
    This function exports all sequences to fasta which have corresponding metadata
    in the metadata schema. It will skip the rest. It DOES export private samples.
    """
    output_fasta = open(output_fasta_name, 'w')
    data_access = data_access_factory(ServerConfig.data_access_type)
    
    # Get all studies from the database
    results = data_access.getUserStudyNames(12161, 1,'qiime')
    
    for study_id, study_name,t,s in results:
        print '------------------------ Exporting data from study ID: %s' % study_id
        print study_name
        print '\n\n'
        export_fasta_from_study(study_id, output_fasta)
Exemplo n.º 29
0
def export_fasta_from_sample(study_id, sample_id, output_fasta):
    # If name passed is a string, open the file. Otherwise ignore as the file
    # has already been opened by the parent
    file_opened_here = False
    if isinstance(output_fasta, str):
        output_fasta = open(output_fasta, 'w')
        file_opened_here = True
    
    # Get our copy of data_access
    data_access = data_access_factory(ServerConfig.data_access_type)
    seqs = data_access.getSequencesFromSample(study_id, sample_id)
    #print '------------------------------ Seqs for sample ID: %s' % str(sample_id)
    for seq in seqs:
        output_fasta.write('>%s\n%s\n' % (seq, seqs[seq]))
        #print seq

    # Close the file if opened in this function
    if file_opened_here:
        output_fasta.close()
Exemplo n.º 30
0
def export_fasta_from_study(study_id, output_fasta):
    # If name passed is a string, open the file. Otherwise ignore as the file
    # has already been opened by the parent
    file_opened_here = False
    if isinstance(output_fasta, str):
        output_fasta = open(output_fasta, 'w')
        file_opened_here = True
    
    # Get our copy of data access
    data_access = data_access_factory(ServerConfig.data_access_type)
    
    # Get all samples for this study
    sample_ids = data_access.getSampleIDsFromStudy(study_id)
    for sample_id in sample_ids:
        export_fasta_from_sample(study_id, sample_id, output_fasta)
    
    # Close the file if opened in this function
    if file_opened_here:
        output_fasta.close()