Exemplo n.º 1
0
def run(predService, seq, name, ssObject,
 startTime, post_data, pdbdata):

	tcount = 0
	for t in threading.enumerate():
		if t.getName() == name:
			tcount += 1

	if tcount > siteLimit[name]:
		tempSS = ss.SS(name)
		tempSS.pred = "Queue Full"
		tempSS.conf = "Queue Full"
		tempSS.status = -1
	else:
		#tempSS = predService.get(seq, tcount)
		tempSS = predService.get(seq)
	
	dbupdate(startTime, tempSS.name + "pred", tempSS.pred)
	dbupdate(startTime, tempSS.name + "conf", tempSS.conf)
	dbupdate(startTime, tempSS.name + "stat", tempSS.status)

	ssObject.append(tempSS)
	majority = batchtools.majorityVote(seq, ssObject)
	dbupdate(startTime, 'majorityvote', majority)

	post_data['completed'] += 1
	if post_data['completed'] == post_data['total_sites']:
		print("All predictions completed.")
		if post_data['email'] != "": #if all completed and user email is not empty, send email
			print ("Sending results to " + post_data['email'])
			#create HTML and store it in post_data
			post_data.update({'output' : htmlmaker.createHTML(ssObject, seq, pdbdata, majority)})
			emailtools.sendEmail(email_service, post_data['email'],"Prediction Results", post_data['output'])
Exemplo n.º 2
0
def get(seq):

    SS = ss.SS("Yaspin")
    SS.status = 0

    if (len(seq) > 4000):
        SS.pred += "Sequence longer than 4000"
        SS.conf += "Sequence longer than 4000"
        SS.status = 2  #error status
        print("YASPIN failed: Sequence longer than 4000")
        return SS  #return SS so it will be readable as an ssObject

    session = GuerrillaMailSession()  #Creates GuerrillaMail session
    email_address = session.get_session_state()[
        'email_address']  #retrieves temp email address

    payload = {
        'seq': seq,
        'mbjob[description]': 'testprot',
        'nnmethod': 'dssp',
        'smethod': 'nr',
        'yaspin_align': 'YASPIN prediction',
        'email': email_address
    }

    fasta = {'seq_file': ''}
    r = requests.post('http://www.ibi.vu.nl/programs/yaspinwww/',
                      data=payload,
                      files=fasta)

    if (r.status_code == 500):
        SS.pred += "Server Down"
        SS.conf += "Server Down"
        SS.status = 2
        print("Yaspin Failed: Server Down")
        return SS

    result_url = r.url + 'results.out'

    requesturl = batchtools.requestWait(result_url, 'Yaspin Not Ready')

    if requesturl:
        raw = requesturl.text.splitlines()

        for i in range(len(raw)):
            if raw[i].startswith(" Pred:"):
                SS.pred += raw[i][6:].strip()
            if raw[i].startswith(" Conf:"):
                SS.conf += raw[i][6:].strip()

        SS.pred = SS.pred.replace('-', 'C')

        SS.status = 1
        print("Yaspin Complete")
    else:
        SS.pred += "Yaspin failed to respond in time"
        SS.conf += "Yaspin failed to respond in time"
        SS.status = 2  #error status
        print("YASPIN failed: No response")
    return SS
Exemplo n.º 3
0
def get(seq):

	SS = ss.SS("PSS")
	SS.status = 0
	
	if time.time() < 1571702400.00:
		SS.pred = "PSS down until Oct 22"
		SS.conf = "PSS down until Oct 22"
		SS.status = 2
		return SS

	if (len(seq) > 4000):
		SS.pred += "Sequence longer than 4000"
		SS.conf += "Sequence longer than 4000"
		SS.status = 2 #error status
		print("PSSPred failed: Sequence longer than 4000")
		return SS #return SS so it will be readable as an ssObject
		
	session = GuerrillaMailSession()	#Creates GuerrillaMail session
	email_address = session.get_session_state()['email_address'] #retrieves temp email address
		
	payload = {'REPLY-E-MAIL': email_address, 
		'TARGET-NAME': 'testprot', 
		'SEQUENCE': seq}
	
	#Around 15 min for 4000
	r= requests.post('https://zhanglab.ccmb.med.umich.edu/cgi-bin/PSSpred.pl', data=payload)

	soup = BeautifulSoup(r.text, 'html.parser')
	
	#Exit if no links available in the response
	if soup.a is None: 
		SS.pred = "Failed to submit, server possibly under load"
		SS.conf = "Failed to submit, server possibly under load"
		SS.status = 2
		return SS

	ssurl = soup.a.get('href')

	ssurl = ssurl + '/seq.SS'

	requesturl = batchtools.requestWait(ssurl, "PSSpred Not Ready")

	if requesturl:
		raw = requesturl.text.splitlines()
		for i in range(len(raw)):
			if raw[i].startswith("conf"):
				SS.conf += raw[i][6:].strip()
			if raw[i].startswith("SS"):
				SS.pred += raw[i][6:].strip()

		SS.status = 1
		print("PSSPred Complete")
	else:
		SS.pred += "PSSPred failed to respond in time"
		SS.conf += "PSSPred failed to respond in time"
		SS.status = 2 #error status
		print("PSSPred failed: No response")
	return SS
Exemplo n.º 4
0
def get(seq):

	SS = ss.SS("RaptorX")
	SS.status = 0
	
	if (len(seq) < 27 or len(seq) > 4000): #<27 shouldnt happen with input validation
		SS.pred += "Sequence is longer than 4000"
		SS.conf += "Sequence is longer than 4000"
		SS.status = 2 #error status
		print("RaptorX failed: Sequence is shorter than 27 or longer than 4000")
		'''
		SS.pred += "Sequence is shorter than 27 or longer than 4000"
		SS.conf += "Sequence is shorter than 27 or longer than 4000"
		SS.status = 2 #error status
		print("RaptorX failed: Sequence is shorter than 27 or longer than 4000")
		'''
		return SS #return SS so it will be readable as an ssObject
	
	session = GuerrillaMailSession()	#Creates GuerrillaMail session
	email_address = session.get_session_state()['email_address'] #retrieves temp email address

	payload = {'jobname': 'myprot', 
		'useProfile': 'true', 
		'predict_sub': 'Submit', 
		'email': email_address, 
		'sequences': seq}

	r = requests.post('http://raptorx.uchicago.edu/StructurePropertyPred/predict/pred/', data=payload)


	soup = BeautifulSoup(r.text,'html.parser')

	try: #try/catch in case a nucleotide/invalid sequence is entered
		url = soup.find(href=re.compile('http://raptorx.uchicago.edu/StructurePropertyPred/myjobs/')).get('href')

		zipid = url.split('_')
		zipid = zipid[1].replace('/','')

		raw = requests.get(url).text


		tree = html.fromstring(raw)

		treelist = tree.xpath('//*[@id="content"]/center[1]/text()')

		'''
		#No cancel
		while treelist != []:
			print('RaptorX Not Ready')
			time.sleep(20)
			raw = requests.get(url).text
			tree = html.fromstring(raw)
			treelist = tree.xpath('//*[@id="content"]/center[1]/text()')
		'''
		
		#Cancel after 20 min
		stime = time.time()
		while treelist != [] or time.time() > stime + 1200:
			print('RaptorX Not Ready')
			time.sleep(20)
			raw = requests.get(url).text
			tree = html.fromstring(raw)
			treelist = tree.xpath('//*[@id="content"]/center[1]/text()')

		if treelist == []:
			treelist = tree.xpath('//*[@id="infoval"]/script/text()')

			zippath = 'http://raptorx.uchicago.edu/'

			for word in treelist[0].split('"'):
				if word[:30] == '/StructurePropertyPred/myjobs/':
					zippath += word


			zipss = requests.get(zippath)

			while not zipss.ok:
				time.sleep(20)
				print('RaptorX Zip Not Ready')
			#zipss = batchtools.requestWait(zippath, 'RaptorX Zip Not Ready')
			
			mzip = request.urlopen(zippath)


			z= bytes(mzip.read())

			zf = ZipFile(io.BytesIO(z), "r")

			ss3path = zipid + '/' + zipid + '.ss3.txt' 

			sslist = zf.read(ss3path).decode('utf-8').splitlines()[2:]


			for i in range(len(sslist)):
				word = sslist[i].split()
				SS.pred += word[2]
				SS.hconf.append(word[3])
				SS.econf.append(word[4])
				SS.cconf.append(word[5])
				
			#SS.conf = "No conf given, Need formula to determine from h/e/c conf"	

			for i in range(len(SS.pred)):
				if SS.pred[i] == 'C':
					if SS.cconf[i] == "1.000":
						SS.conf += '9'			#puts in 9 if 100% confidence level
					else:
						SS.conf += SS.cconf[i][2]   #puts in the 3rd character from the individual conf value string e.g. 8 if string is 0.873
				if SS.pred[i] == 'E':
					if SS.econf[i] == "1.000":
						SS.conf += '9'
					else:
						SS.conf += SS.econf[i][2]
				if SS.pred[i] == 'H':
					if SS.hconf[i] == "1.000":
						SS.conf += '9'
					else:
						SS.conf += SS.hconf[i][2]

			SS.status = 1
			print("RaptorX Complete")
		else:
			SS.pred += "failed to respond in time"
			SS.conf += "failed to respond in time"
			SS.status = 2 #error status
			print("RaptorX failed: No response")
	except:
		SS.pred += "sequence not accepted"
		SS.conf += "sequence not accepted"
		SS.status = 4
		print("RaptorX failed: sequence not accepted")
		
	print("RAPTOR::")
	print(SS.pred)
	print(SS.conf)
	
	return SS
Exemplo n.º 5
0
def get(seq):
	
	SS = ss.SS("PSI")

	if (len(seq) < 30 or len(seq) > 1500):
		SS.pred += "Sequence is shorter than 30 or longer than 1500"
		SS.conf += "Sequence is shorter than 30 or longer than 1500"
		SS.status = 2 #error status
		print("PsiPred failed: Sequence is shorter than 30 or longer than 1500")
		return SS #return SS so it will be readable as an ssObject
		
	session = GuerrillaMailSession()	#Creates GuerrillaMail session
	email_address = session.get_session_state()['email_address'] #retrieves temp email address
	
	url = 'http://bioinf.cs.ucl.ac.uk/psipred/api/submission/'
	payload = {'input_data': seq}
	data = {'job': 'psipred',
        	'submission_name': 'testing',
        	'email': email_address, }
	r = requests.post(url, data=data, files=payload, headers={'accept': 'application/json'})
	
	try: #try/catch in case a nucleotide/invalid sequence is entered
		uuid = r.json()['UUID']
		

		jsonurl = 'http://bioinf.cs.ucl.ac.uk/psipred/api/submission/' + uuid + '?format=json'


		r = requests.get(jsonurl)

		filesUUID = r.json()['submissions'][0]['UUID'] 

		horiz = 'http://bioinf.cs.ucl.ac.uk/psipred/api/submissions/' + filesUUID + '.horiz'
		
		#Length 1500 takes around 5 min
		requesturl = batchtools.requestWait(horiz, 'PsiPred Not Ready')
		
		if requesturl:
			raw = requesturl.text.splitlines()
			for i in range(len(raw)):
				raw[i] = raw[i].strip()
				if raw[i].startswith("Conf"):
					SS.conf += raw[i][6:]
				if raw[i].startswith("Pred"):
					SS.pred += raw[i][6:]
					
			SS.status = 1
			print("PsiPred Complete")
		else:
			SS.pred += "PsiPred failed to respond in time"
			SS.conf += "PsiPred failed to respond in time"
			SS.status = 2 #error status
			print("PsiPred failed: No response")

	except:
		SS.pred += "PsiPred failed: sequence not accepted"
		SS.conf += "PsiPred failed: sequence not accepted"
		SS.status = 4
		print("PsiPred failed: sequence not accepted")
			
	print("PSI::")
	print(SS.pred)
	print(SS.conf)
	
	return SS
Exemplo n.º 6
0
def get(seq):

    SS = ss.SS("JPred")

    if (len(seq) < 20 or len(seq) > 800):
        SS.pred += "Sequence is shorter than 20 or longer than 800"
        SS.conf += "Sequence is shorter than 20 or longer than 800"
        SS.status = 2  #error status
        print("JPred failed: Sequence is shorter than 20 or longer than 800")
        return SS  #return SS so it will be readable as an ssObject

    session = GuerrillaMailSession()  #Creates GuerrillaMail session
    email_address = session.get_session_state()[
        'email_address']  #retrieves temp email address

    payload = {
        'email': email_address,
        'queryName': 'testprot',
        'input': 'seq',
        'pdb': '1',
        '.submit': 'continue',
        'seq': seq
    }

    r = requests.post(
        'http://www.compbio.dundee.ac.uk/jpred4/cgi-bin/jpred_form',
        data=payload)

    try:  #try/catch in case a nucleotide/invalid sequence is entered
        response = r.headers['Refresh'].split('?')
        jobid = response[1]

        joburl = 'http://www.compbio.dundee.ac.uk/jpred4/results/' + jobid + '/' + jobid + '.jnet'

        page = requests.get(joburl).text

        #No cancel
        while page[0] == '<':
            print("JpredSS Not Ready")
            time.sleep(20)
            page = requests.get(joburl).text
        '''
		#Cancel after 15 min
		stime  = time.time()
		while page[0] == '<' or time.time() > stime + 900:
			print("JpredSS Not Ready")
			time.sleep(20)
			page = requests.get(joburl).text
		'''
        if page[0] != '<':
            raw = page.splitlines()

            SS.pred = raw[1].replace('jnetpred:', '')
            SS.pred = SS.pred.replace('-', 'C')  #Replaces dashes with C
            SS.pred = SS.pred.replace(',', '')

            SS.conf = raw[2].replace('JNETCONF:', '')
            SS.conf = SS.conf.replace(',', '')

            SS.status = 1
            print("JPred Complete")
        else:
            SS.pred += "JPred failed to respond in time"
            SS.conf += "JPred failed to respond in time"
            SS.status = 2  #error status
            print("JPred failed: No response")
    except:
        SS.pred += "JPred failed: sequence not accepted"
        SS.conf += "JPred failed: sequence not accepted"
        SS.status = 4
        print("JPred failed: sequence not accepted")

    print("JPRED::")
    print(SS.pred)
    print(SS.conf)

    return SS
Exemplo n.º 7
0
def get(seq):

    SS = ss.SS("SSPro")
    SS.status = 0

    if (len(seq) > 400):
        SS.pred += "Sequence longer than 400"
        SS.conf += "Sequence longer than 400"
        SS.status = 2  #error status
        print("SSPro failed: Sequence longer than 400")
        return SS  #return SS so it will be readable as an ssObject

    randName = batchtools.randBase62()
    session = GuerrillaMailSession()  #Creates GuerrillaMail session
    email_address = session.get_session_state()[
        'email_address']  #retrieves temp email address

    payload = {
        'amino_acids': seq,
        'query_name': randName,
        'email': email_address,
        'ss': 'on'
    }

    r = requests.post(
        'http://scratch.proteomics.ics.uci.edu/cgi-bin/new_server/sql_predict.cgi',
        data=payload)

    soup = BeautifulSoup(r.text, 'html.parser')
    msg = soup.find('p')
    if msg == None:
        SS.pred += "Failed to Submit"
        SS.conf += "Failed to Submit"
        SS.status = 2  #error status
        print("SSPro Failed to Submit")
        return SS

    if msg.text.split()[0] == 'ERROR:':
        SS.pred += "Queue Full"
        SS.conf += "Queue Full"
        SS.status = 2  #error status
        print("SSPro Queue Full")
        return SS

    query = 'from:([email protected]) subject:(Protein Structure Predictions for ' + randName + ')'
    stime = time.time()
    email_id = False

    #Waits indefinitely until results are out
    email_id, message = batchtools.emailRequestWait(session, query, "Name:",
                                                    randName,
                                                    "SSPro Not Ready", 60)

    #Cancels after 15 min. Length 400 sequences take 10-15 min in a batch
    #email_id, message = batchtools.emailRequestWait(session, query, "Name:", randName, "SSPro Not Ready", 60, 900)

    if email_id:
        message_parts = message.splitlines()

        index = 0  #current line
        finished_scrape = False

        while index < len(message_parts) and not finished_scrape:
            index += 1
            if message_parts[
                    index] == "Predicted Secondary Structure (3 Class):":
                while message_parts[index]:  #while not blank line
                    index += 1
                    SS.pred += message_parts[index]
                finished_scrape = True

        SS.conf = "SSPro Does Not Provide Any Conf"
        SS.status = 3
        print("SSpro Complete")
    else:
        SS.pred += "SSPro failed to respond in time"
        SS.conf += "SSPro failed to respond in time"
        SS.status = 2  #error status
        print("SSPro failed: No response")
    return SS
Exemplo n.º 8
0
def get(seq):
	
	SS = ss.SS("Sable")
	if len(seq) <= 12:
		SS.status = 2
		SS.pred += "Sequence is shorter than or equal to 12"
		SS.conf += "Sequence is shorter than or equal to 12"
		print("SABLE failed: Sequence is shorter than or equal to 12")
		
	SS.status = 0
	
	randName = batchtools.randBase62()
	session = GuerrillaMailSession()	#Creates GuerrillaMail session
	email_address = session.get_session_state()['email_address'] #retrieves temp email address

	payload = {'txtSeq': seq, 
	'seqName': randName,
	'email': email_address, 
	'fileName':'', 
	'SS':'SS', 
	'version':'sable2', 
	'SAaction': 'wApproximator',
	'SAvalue':'REAL'}
	
	r = requests.post('http://sable.cchmc.org/cgi-bin/sable_server_July2003.cgi', data = payload)
	
	#sable uses multiple emails to send results
	query = 'from:(sable) subject:(sable result) query: ' + randName

	#Length 4000 takes around 10 min
	message  = ''
	stime  = time.time()
	email_id = False
	
	#Waits indefinitely until results are out
	email_id, message = batchtools.emailRequestWait(session, query, "Query:", randName, "Sable Not Ready", 30)
	
	#Cancel in 15 min
	#email_id, message = batchtools.emailRequestWait(session, query, "Query:", randName, "Sable Not Ready", 30, 900)
	
	if email_id:
		#message = emailtools.decodeEmail(email_service, email_id)
		#print(message)
		message_parts = message.splitlines()

		#getting the prediction sequence and confidence
		index = 0
		while message_parts[index][:11] != 'END_SECTION':
			if message_parts[index].startswith('>'):
				SS.pred += message_parts[index + 2].strip()
				SS.conf += message_parts[index + 3].strip()
				index + 4 #add 4 then 1 later to get to next set of prediction
			index += 1

		#getting the probabilities for helix, beta strand, coil
		index += 1 #go past the prediction's 'END_SECTION'
		helixProb = ''
		betaProb = ''
		coilProb = ''
		while message_parts[index][:11] != 'END_SECTION':
			if message_parts[index].startswith('>'):
				helixProb += message_parts[index + 2][3:].strip() + ' '
				betaProb += message_parts[index + 3][3:].strip() + ' '
				coilProb += message_parts[index + 4][3:].strip() + ' '
			index += 1
			
		SS.hconf = helixProb.split()
		SS.econf = betaProb.split()
		SS.cconf = coilProb.split()
		
		SS.status = 1
		print(SS.pred)
		print(SS.conf)
		print("Sable Complete")
	else:
		SS.pred += "Sable failed to respond in time"
		SS.conf += "Sable failed to respond in time"
		SS.status = 2 #error status
		print("Sable failed: No response")
	return SS