def makeFE_table(inFile, constCov, constrain): if (type(inFile) == str) and (str(type(inFile[0])) != "<class 'Bio.SeqRecord.SeqRecord'>"): inFile = GBcode(openFasta(inFile)) table = [] for item in inFile: table.append([item.description, getFree_energy(str(item.seq)), getFree_energy(str(item.seq), constCov), getFree_energy(str(item.seq), constrain)]) return table
def haploRNA3D(templateInput, fastaFile, cycles, path, chain='A'): """ Creates a tertiary structure for each unique sequence in a Fasta file using the methods from the module moderna. This function returns three files for each sequences: (1) a fasta file comparing the target sequence with the sequence of the template pdb structure, (2) a pdb file of the homologous structure with no refinement, and (3) a pdb file of the homologous structure with refinement. Results are showed in a folder named 'haploRNA3D_month_day_hour_min'. This folder is located in Home. Module required: - argparse - subprocess - time - expanduser (from os.path) Usage: <template> <fasta file> <cycles refinement> <path> <chain (default='A')> """ print 'Loading template...' template = load_template(templateInput, str(chain)) clean_structure(template) seqTemplate = str(get_sequence(template)) print 'Loading Fasta file...' records = openFasta(fastaFile) nameLst, seqLst = getNameSeq(records) for index in range(len(nameLst)): seqLst[index] = replaceBase(seqLst[index], seqTemplate) if str(path).endswith('/'): nameDir = str(path)+'haploRNA3D_'+time.ctime().replace(' ','_').replace(':','_')[4:16] else: nameDir = str(path)+'/'+'haploRNA3D_'+time.ctime().replace(' ','_').replace(':','_')[4:16] subprocess.check_call(['mkdir', nameDir]) muscle = locate('muscle') print 'Refining the models...' refinementScript = locate('refine_model_mmtk.py') secondStruct = open(nameDir+'/Secondary_Structure.fasta', 'w') for elm in range(len(seqLst)): print 'Refining', elm+1, 'of', len(seqLst) seqTargTempl(seqLst[elm], seqTemplate, nameLst[elm], nameDir) model(muscle, nameDir, nameLst[elm], templateInput, chain) currentTemplate = load_template(nameDir+'/'+nameLst[elm][:10]) try: subprocess.check_call(['python', refinementScript, '-m', nameDir+'/'+nameLst[elm][:10], '-y', str(cycles), '-o', nameDir+'/'+nameLst[elm][:11]+'_refined.pdb', '-t']) except: subprocess.check_call(['python', refinementScript, '-m', nameDir+'/'+nameLst[elm][:10], '-y', str(cycles), '-r', '2-'+str(len(currentTemplate)), '-o', nameDir+'/'+nameLst[elm][:11]+'_refined.pdb', '-t']) newmodel = load_model(nameDir+'/'+nameLst[elm][:11]+'_refined.pdb') newmodelStruc = get_secstruc(newmodel) newmodelSeqen = str(get_sequence(newmodel)) secondStruct.write('>'+nameLst[elm][:11]+'\n'+newmodelSeqen+'\n'+newmodelStruc+'\n') secondStruct.close() print 'Done!'
def adjusted_structure_entropy(inFile, outFile, title, structure): """ This function returns a figure of structural entropy when several sequences (from a fasta file) are adjusted to a defined secondary RNA structure. Usage: <inFile> <outFile> <title> <structure> """ if (type(inFile) == str) and (str(type(inFile[0])) != "<class 'Bio.SeqRecord.SeqRecord'>"): inFile = openFasta(inFile) for elem in inFile: fit_result = fit_to_secondStruct(structure, str(elem.seq)) elem.seq = fit_result info_content_file(inFile, title, outFile)
def tableByGB(inFile): """ WARNING: before to use this function identify yourself to NCBI using Entrez.email. This function build a table based on a list. Each element is the info for each sequence. Module required: - Entrez (from Bio) - SeqIO (from Bio) """ if (type(inFile) == str) and (str(type(inFile[0])) != "<class 'Bio.SeqRecord.SeqRecord'>"): inFile = openFasta(inFile) inFile = GBcode(inFile) GB_table = [] for elem in inFile: GB_table.append(get_info_byGB(elem.description)) return GB_table