Пример #1
0
    def __init__(self,
                 queryid,
                 result_fn=None,
                 iteration=None,
                 use_iteration_info=True,
                 strict_id_match=False):
        '''
		HHsearch parses the HHsearch result file.
		Its major functionality is parsing the alignments and
		scores and return the GenericPairwiseAlignment object
		So that the alignments and scores can be easily manipulated.
		'''

        self.queryid = queryid
        self.result_fn = result_fn
        self.alignments = None
        self.iteration = iteration
        self.strict_id_match = strict_id_match  # to control id match in query and target lines

        #if iteration information is not given,
        #yet still use_iteration_info is True,
        #try to parse the profile filename
        if self.iteration == None:
            result_dir, basename, iter, suffix = parse_profile_filename(
                result_fn)
            self.iteration = iter

        if self.result_fn:
            self.alignments = self.parse()
Пример #2
0
    def __init__(self,
                 queryid=None,
                 result_fn=None,
                 iteration=None,
                 use_iteration_info=True):
        '''
                COMPASS parses the COMPASS result file.
                Its major functionality is parsing the alignments and
                scores and return the GenericPairwiseAlignment object
                So that the alignments and scores can be easily manipulated.
                '''
        self.queryid = queryid
        self.result_fn = result_fn
        self.alignments = None
        self.iteration = iteration

        #if iteration information is not given,
        #yet still use_iteration_info is True,
        #try to parse the profile filename
        if self.iteration == None:
            result_dir, basename, iter, suffix = parse_profile_filename(
                result_fn)
            self.iteration = iter

        if self.result_fn:
            self.alignments = self.parse()
Пример #3
0
        def set_result_files( self ) :
                '''
                returns list of a3m files in the save_dir.
                '''
                basename = parse_profile_filename( self.sequence )[1]
                if not basename :
                        return
		
		a3m_files = glob.glob( os.path.join( self.save_dir, basename + '*.a3m' ) )
		new_a3m_files = []
		for a3m in a3m_files :
			dir, basename2, iteration, ext = parse_profile_filename( a3m )
			newa3m = build_profile_filename( dir, basename, iteration, ext )
			shutil.move( a3m, newa3m )
			new_a3m_files.append( newa3m )

                self.a3m_files = new_a3m_files
		self.a3m_files.sort()
Пример #4
0
    def get_iteration_file(self, i):
        '''
		returns filename that contains the given iteration.
		If the filename with iteration is found, it will return None.
		'''
        for a3m_file in self.a3m_files:
            iteration = parse_profile_filename(a3m_file)[-2]
            if int(i) == int(iteration):
                return a3m_file
        return None
Пример #5
0
    def __init__(self, input_file=None, output_file=None):
        if input_file == None:
            raise TypeError("No Input file is given.")

        if output_file == None:
            dir, base_filename, iteration, ext = parse_profile_filename(
                input_file)
            output_file = build_profile_filename(dir, base_filename, iteration,
                                                 ".pnp")

        self.input_file = input_file
        self.output_file = output_file
Пример #6
0
    def __init__(self,
                 cmd=None,
                 input_file=None,
                 output_file=None,
                 calibration_db=None,
                 calibrate=True,
                 calibration_cmd=None):

        if cmd == None:
            cmd = Settings.get("hhmake")

        if input_file == None:
            raise TypeError("Input_file should be given.")

        if calibration_cmd == None:
            calibration_cmd = Settings.get("hhsearch_cmd")

        if calibration_db == None:
            calibration_db = Settings.get("hhm_cal_db")

        if output_file == None:
            dir, base_filename, iteration, ext = parse_profile_filename(
                input_file)
            output_file = build_profile_filename(dir, base_filename, iteration,
                                                 '.hhm')

        self.cmd = cmd
        self.calibration_cmd = calibration_cmd
        self.calibration_db = calibration_db

        self.calibrate = calibrate

        self.input_file = input_file
        self.output_file = output_file

        #the following part is added due to hhsearch bug
        #of cannot handle long input file name handling!
        self.tmpinput = tempfile.NamedTemporaryFile()
        self.tmpinputname = self.tmpinput.name
        try:
            shutil.copy(self.input_file, self.tmpinputname)
        except IOError:
            self.tmpinputname = self.input_file

        self.tmpoutput = tempfile.NamedTemporaryFile()
        self.tmpoutputname = self.tmpoutput.name
        #need to be copied after the execution!

        self.command_lines = self.get_command_lines()
Пример #7
0
    def get_command_line(self):

        if self.input_type == None:
            self.input_type = 'a3m'

        if self.output_type == None:
            self.output_type = 'psi'

        if self.output_file == None:
            dir, base_filename, iteration, ext = parse_profile_filename(
                self.input_file)
            self.output_file = build_profile_filename(dir, base_filename,
                                                      iteration,
                                                      '.' + self.output_type)
        command_line = [
            self.cmd, self.input_type, self.output_type, self.input_file,
            self.output_file
        ]
        return command_line
Пример #8
0
    def get_command_line(self):
        if self.output_file == None:
            dir, base_filename, iteration, ext = parse_profile_filename(
                self.input_file)

            if verbose:
                print("COMPASS Builder file analysis")
                print('input_file', self.input_file)
                print("dir:", dir)
                print("base_filename:", base_filename)
                print("ieration:", iteration)
                print("ext:", ext)

            self.output_file = build_profile_filename(dir, base_filename,
                                                      iteration, '.cnp')

        self.temporary_output_fp = tempfile.NamedTemporaryFile()
        self.temporary_output_file = self.temporary_output_fp.name
        list_fn = self.prepare_list_file()
        command_line = [
            self.cmd, '-i', list_fn, '-o', self.temporary_output_file
        ]
        return command_line
Пример #9
0
def check_generated_profile(dominfo):
    '''
	returns number of iterations when all iteration results
	meet integrity check criteria.
	
	Currently, simply check
	the existence of the all iteration files have the same 
	number of iterations.
	A more rigorous checking should be implemented
	e.g. checking the contents of each profile type.
	'''

    if verbose:
        print('checking profile integrity...', dominfo['uniqueid'])

    if dominfo.get('profile_integrity'):
        return dominfo['profile_integrity']

    domid = dominfo['uniqueid']
    dompath = dominfo['domain_path']

    if not os.path.exists(dompath):
        return 0

    basename = os.path.join(dompath, domid)

    all_profiles = glob.glob(os.path.join(basename + ".*.*"))
    pnp = [
        fn for fn in all_profiles
        if fn.endswith('.pnp') and parse_profile_filename(fn)[2]
    ]
    a3m = [
        fn for fn in all_profiles
        if fn.endswith('.a3m') and parse_profile_filename(fn)[2]
    ]
    hhm = [
        fn for fn in all_profiles
        if fn.endswith('.hhm') and parse_profile_filename(fn)[2]
    ]
    cnp = [
        fn for fn in all_profiles
        if fn.endswith('.cnp') and parse_profile_filename(fn)[2]
    ]
    cnplen = [
        fn for fn in all_profiles
        if fn.endswith('.cnp.len') and parse_profile_filename(fn)[2]
    ]

    #checking single item
    #for null case to be the True.
    if not pnp:
        if verbose:
            print("No result file is found!")

        return 0

    if len(pnp) == len(a3m) == len(hhm) == len(cnp) == len(cnplen):
        #need to continue to test a little more on the maximum
        #iteration!!
        pass
    else:
        return 0
    try:
        max_pnpi = max([int(parse_profile_filename(fn)[-2]) for fn in pnp])
        max_a3mi = max([int(parse_profile_filename(fn)[-2]) for fn in a3m])
        max_hhmi = max([int(parse_profile_filename(fn)[-2]) for fn in hhm])
        max_cnpi = max([int(parse_profile_filename(fn)[-2]) for fn in cnp])
        max_cnpleni = max(
            [int(parse_profile_filename(fn)[-2]) for fn in cnplen])

        if max_pnpi == max_a3mi == max_hhmi == max_cnpi == max_cnpleni == len(
                pnp):
            return max_pnpi
        else:
            return 0

    except ValueError:
        if verbose:
            print("The filename parsing routine has a problem!")
            print(pnp)
            print(a3m)
            print(hhm)
            print(cnp)
            print(cnplen)
        return 0