def test_root_count(root): go("/") show() code(200) find("PyPI compatible package index serving 0 packages") showlinks() root.join("Twisted-11.0.0.tar.bz2").write("") reload() show() find("PyPI compatible package index serving 1 packages")
def test_simple_index_case(root): root.join("FooBar-1.0.zip").write("") root.join("FooBar-1.1.zip").write("") go("/simple/foobar") show() links = list(showlinks()) assert len(links) == 2
def get_file_urls(): import twill.commands as w w.go(base_url) urls = [] for link in w.showlinks(): url = link.url if url.startswith('Table') and url.endswith('.txt'): urls.append(url) return urls
def test_nonroot_simple_packages(root): root.join("foobar-1.0.zip").write("123") for url in ["http://nonroot/priv/packages", "http://nonroot/priv/packages/"]: go(url) show() links = list(showlinks()) assert len(links) == 1 assert links[0].url == "/priv/packages/foobar-1.0.zip"
def test_simple_index_list(root): root.join("foobar-1.0.zip").write("") root.join("foobar-1.1.zip").write("") root.join("foobarbaz-1.1.zip").write("") root.join("foobar.baz-1.1.zip").write("") go("/simple/") show() links = list(showlinks()) assert len(links) == 3
def visit_page(url, browser, results): go(url) results[url] = browser.get_code() if browser._browser.viewing_html(): for link in showlinks(): full_url = urljoin(link.base_url, link.url) if full_url not in results: visit_page(full_url, browser, results) browser.back() return results
def test_nonroot_simple_packages(root): root.join("foobar-1.0.zip").write("123") for url in [ "http://nonroot/priv/packages", "http://nonroot/priv/packages/" ]: go(url) show() links = list(showlinks()) assert len(links) == 1 assert links[0].url == "/priv/packages/foobar-1.0.zip"
def __init__ ( self ): #self.base_url = "http://www.webcamps.com.br/" #self.base_url = "http://localhost" self.base_url = "http://www.scarpan.com.br" self.links = [] self.visited = ["%s/" % self.base_url] browsers = ["webdriver.firefox", "webdriver.chrome", "zope.testbrowser"] self.browser = Browser(browsers[randint(0,2)]) #Redirect Messages firefox.redirect_error("/tmp/twill.err") firefox.redirect_output("/tmp/twill.out") firefox.go( self.base_url ) self.browser.visit( self.base_url ) self.extractAllLinks ( firefox.showlinks() ) self.visitAllLinks()
def visitAllLinks(self): valid_links = True while valid_links: wait = randint (0,60) #print "Wait %s for surf new page" % wait #sleep (wait) if len( self.links ) == 0: valid_links = False break link = self.links.pop() if link not in self.visited: try: #Debug print "Visit : %s" % link firefox.go(link) self.browser.visit( link ) self.visited.append (link) self.extractAllLinks ( firefox.showlinks() ) except: pass else: #print "Visited: %s" % link pass self.extractAllLinks( firefox.showlinks() ) for link in self.links: if self.checkImage ( link ): continue if link not in self.visited: self.visitAllLinks() print "Finished close browser" try: self.browser.quit() except Exception , e: print "%s %s" % ( e.__doc__ , e )
self.silent = True def search (self, key_word): mybrowser.go( self.base_url ) #Silent Mode default True' if self.silent: mybrowser.redirect_error("/dev/null") mybrowser.redirect_output("/dev/null") try: mybrowser.formclear("1")#reset For first Form mybrowser.fv("1", "q", key_word) #change First Form with Key Word mybrowser.submit("btnG") #Click Search in Google not print "Request Finished" except HTTPError, httpe: print "\nError: %s\n%s\n%s" % (httpe.code, httpe.msg, httpe.__doc__) except IOError, io: print "IOError: %s " % io except Exception, e: print "Generic Error: %s" % e print "Retrieve Links" links = mybrowser.showlinks() follow_links = [] for link in links: follow_links.append ( link.url ) print follow_links
def test_simple_list_no_dotdir2(root): root.mkdir(".subdir").join("foo-1.0.zip").write("secret") go("/simple/foo/") show() assert list(showlinks()) == []
def test_simple_list_no_dotfiles2(root): root.join(".foo-1.0.zip").write("secret") go("/simple/.foo/") assert list(showlinks()) == []
def pobierzPlan(user, password): tablicaHTMLow = [] commands.add_extra_header( 'User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6') commands.clear_cookies() # Czyszczenie ciastek commands.reset_browser() #restart przegladarki commands.reset_output() #commands.config('use_BeautifulSoup', '0') commands.go("https://edukacja.pwr.wroc.pl/EdukacjaWeb/studia.do" ) # Przechodzimy do edukacji commands.showlinks() # DO USUNIECIA! Pokazuje linki commands.formclear('1') # Czysci formularz logowania commands.formvalue('1', 'login', user) # Podaje login commands.formvalue('1', 'password', password) # Podaje hasło commands.submit('0') # Klika zaloguj print("Linki po submit") # DO USUNIECIA! Pokazuje informacje commands.showlinks() # DO USUNIECIA! Pokazuje linki czyBledneLogowanie = sprawdzCzyBledneLogowanie( commands.show()) # Sprawdza czy na stronie wystapil blad czyLogowanieWInnejSesji = sprawdzCzyLogowanieWInnejSesji(commands.show()) if (czyBledneLogowanie == True): return "Bledne dane logowania" if (czyLogowanieWInnejSesji == True): return "zalogowany w innej sesji" commands.follow("zapisy.do") # Przechodzi linkiem na stronę zapisów # DO USUNIECIA! Pokazuje stronę po kliknięciu zapisy! print("Co po kliknieciu Zapisy") #commands.show() commands.showlinks() print "Forms:" formsy = commands.showforms() links = commands.showlinks() # Pobiera linki z danej strony control = None values = None select_options = None try: control = commands.browser.get_form("1").find_control('ineSluId', type="select") except: print("Nie ma selecta.") #control = commands.browser.get_form("1").find_control('ineSluId', type="select") if (control != None): # Jesli na stronie jest select values = pobierzElementySelect( commands.show()) # Pobieram parserem wartosci selecta select_options = utworzTabeleSelect( values) # Tworze nowy select podmieniajac stary select_attrs = {'id': 'some_id'} # Tworze atrybuty nowego selecta for v in values: # Lece petla po wartosciach selecta form = commands.get_browser().get_form("1") # Pobieram formularz ct = commands.browser.get_form("1").find_control( 'ineSluId', type="select") # Pobieram kontrolke z selectem add_select_to_form(form, 'ineSluId', select_attrs, select_options) # Tworze nowego selecta form.fixup( ) # Sprawdzam czy cos nowego nie zostało dodanego do form commands.showforms() commands.formvalue("1", ct.name, v.strip()) # Podaje wartosc dla selecta commands.submit('0') # Klikam submit html = pobierzZajecia(commands) # Pobieram zajecia commands.follow("zapisy.do") # Wracam do strony zapisów #commands.sleep(6) if ( html != "skreslony z kierunku" ): # Jesli funkcja zwrocila ze jest ktos skreslony to nie dodaje htmlu tablicaHTMLow.append( html ) # Jeli nie zwrocila takiego komunikatu to dodajemy ten html do tablicy else: html = pobierzZajecia( commands ) # Jesli nie ma selecta to pobieramy zajeci z tego kierunku i juz :D if (html != "skreslony z kierunku"): tablicaHTMLow.append(html) #print control.name, control.value, control.type #item = control.get("172748") #print item.name, item.selected, item.id, item.attrs #new_options #commands.formclear('1') # # #form = commands.get_browser().get_form("1") # #print('TO JEST TEN FORM:') #print(form) #print(len(form.controls)) # #notIsSelect = True #for ct in form.controls: # #print(ct) # if ct.name == 'ineSluId': # notIsSelect = False # print('JESTEM') # commands.sleep(3) # # select_attrs = {'id': 'some_id'} # values = pobierzElementySelect(commands.show()) # select_options = utworzTabeleSelect(values) # print(values) # print(select_options) # # # for v in values: # #form.fixup() # add_select_to_form(form, 'ineSluId', select_attrs, select_options) # form.fixup() # #ct.get(v).selected = True # print(ct) # print(form) # print(v) # commands.showforms() # # commands.formvalue("1", ct.name, v.strip()) # Podaje login # print("JEEDEFE") # commands.submit('0') # html = pobierzZajecia(commands) # commands.follow("zapisy.do") # print("JEEEEEEEEESSSSSTTTTTEEEEEMMM") # # commands.sleep(6) # if (html != "skreslony z kierunku"): # tablicaHTMLow.append(html) #ct.get(v).selected = False #for ct2 in form.controls: # if ct2.type == 'submit': # szukam wsrod niej tej co ma typ submit # commands.get_browser().clicked(form, ct2.attrs['name']) # klikam na ten przycisk # commands.get_browser().submit() #links = commands.showlinks() #commands.back() #commands.showforms() #return "no" #if (notIsSelect == True): # html = pobierzZajecia(commands) # if (html != "skreslony z kierunku"): # tablicaHTMLow.append(html) wyloguj(commands) commands.browser.clear_cookies() #usuwanie ciasteczek commands.reset_browser() #restart przegladarki commands.reset_output() return tablicaHTMLow
def pobierzZajecia(commands): print("Po wybraniu") links = commands.showlinks() # Pobiera linki z danej strony commands.showforms() # Szuka w linkach tego przenoszącego na odpowiedni semestr. dateToday = datetime.date.today() firstOctober = datetime.date(dateToday.year, 10, 1) if dateToday > firstOctober: # Jesli dzisiaj jest wiekszy niz 1 Pazdziernika #Semestr zimowy for link in links: ktory = 0 if link.text=='' + str(dateToday.year) + '/' + str(dateToday.year+1) + '': # Szukamy linka o tytule (rok/rok+1) ktory = ktory + 1 if ktory == 1: # Znalazł! commands.go(link.url) # Przechodzimy pod url'sa który się kryje pod tym rokiem else: #Semest letni for link in links: ktory = 0 if link.text=='' + str(dateToday.year)+ '/' + str(dateToday.year+1) + '': # Szukamy linka o tytule (rok/rok+1) ktory = ktory + 1 if ktory == 2: # Znalazł! commands.go(link.url) # Przechodzimy pod url'sa który się kryje pod tym rokiem # DO USUNIECIA! Pokazuje stronę po kliknięciu danego semestru! print("Co po kliknieciu semestru:") commands.showlinks() print "Forms:" commands.showforms() # Szuka w formularzach tego odpowiadającego za pokazanie zapisanych kursów. forms = commands.showforms() # Pobranie formularzy naszForm = None # Zmienna do ktorej zapiszemy znaleziony przez nas form for form in forms: # Petla po formualrzach if form.action == 'https://edukacja.pwr.wroc.pl/EdukacjaWeb/zapisy.do?href=#hrefZapisySzczSlu': # Jesli akcja danego formularza przenosi do szczegolow naszForm = form # To zapisujemy znaleziony formularz print(naszForm) # DO USUNIECIA! Wypisuje znaleziony formularz if(naszForm != None): # To znaczy ze znalazlo taki rok i ze jeszcze istnieje student na tym kierunku ctrl = naszForm.controls # pobieram ze znalezionego formularza wszystkie kontrolki for ct in ctrl: if ct.type == 'submit': # szukam wsrod niej tej co ma typ submit commands.get_browser().clicked(naszForm, ct.attrs['name']) # klikam na ten przycisk commands.get_browser().submit() print("Co po kliknieciu szczegoly zajec") commands.showlinks() #commands.sleep(5) print "Forms:" #commands.showforms() content = commands.show() #content ="" #commands.browser.clear_cookies() #usuwanie ciasteczek #print('PRZZEEEEEENIOSLEM') #commands.sleep(5) return content else: #commands.follow("zapisy.do") #print('PRZZEEEEEENIOSLEM') return("skreslony z kierunku")
def test_packages_empty(root): go("/packages") show() code(200) assert list(showlinks()) == []
def annotate(params, proteins, \ url="http://services.cbu.uib.no/tools/bomp/", force=False): """ Uses the BOMP web service (http://services.cbu.uib.no/tools/bomp/) to predict if proteins are outer membrane beta-barrels. """ # set the user-agent so web services can block us if they want ... :/ python_version = sys.version.split()[0] agent("Python-urllib/%s (twill; inmembrane/%s)" % (python_version, inmembrane.__version__)) bomp_out = 'bomp.out' log_stderr("# BOMP(web) %s > %s" % (params['fasta'], bomp_out)) if not force and os.path.isfile(bomp_out): log_stderr("# -> skipped: %s already exists" % bomp_out) bomp_categories = {} fh = open(bomp_out, 'r') for l in fh: words = l.split() bomp_category = int(words[-1:][0]) seqid = parse_fasta_header(l)[0] proteins[seqid]['bomp'] = bomp_category bomp_categories[seqid] = bomp_category fh.close() return bomp_categories # dump extraneous output into this blackhole so we don't see it if not __DEBUG__: twill.set_output(StringIO.StringIO()) go(url) if __DEBUG__: showforms() formfile("1", "queryfile", params["fasta"]) submit() if __DEBUG__: show() # extract the job id from the page links = showlinks() job_id = None for l in links: if l.url.find("viewOutput") != -1: # grab job id from "viewOutput?id=16745338" job_id = int(l.url.split("=")[1]) if __DEBUG__: log_stderr("BOMP job id: %d" % job_id) if not job_id: # something went wrong log_stderr("# BOMP error: Can't find job id") return # parse the HTML table and extract categories go("viewOutput?id=%i" % (job_id)) polltime = 10 log_stderr("# Waiting for BOMP to finish .") while True: try: find("Not finished") log_stderr(".") except: # Finished ! Pull down the result page. log_stderr(". done!\n") go("viewOutput?id=%i" % (job_id)) if __DEBUG__: log_stderr(show()) break # Not finished. We keep polling for a time until # we give up time.sleep(polltime) polltime = polltime * 2 if polltime >= 7200: # 2 hours log_stderr("# BOMP error: Taking too long.") return go("viewOutput?id=%i" % (job_id)) if __DEBUG__: log_stderr(show()) bomp_html = show() if __DEBUG__: log_stderr(bomp_html) # Results are in the only <table> on this page, formatted like: # <tr><th>gi|107836852|gb|ABF84721.1<th>5</tr> soup = BeautifulSoup(bomp_html) bomp_categories = {} # dictionary of {name, category} pairs for tr in soup.findAll('tr')[1:]: n, c = tr.findAll('th') name = parse_fasta_header(n.text.strip())[0] category = int(c.text) bomp_categories[name] = category # write BOMP results to a tab delimited file fh = open(bomp_out, 'w') for k, v in bomp_categories.iteritems(): fh.write("%s\t%i\n" % (k, v)) fh.close() if __DEBUG__: log_stderr(str(bomp_categories)) # label proteins with bomp classification (int) or False for name in proteins: if "bomp" not in proteins[name]: if name in bomp_categories: category = int(bomp_categories[name]) proteins[name]['bomp'] = category else: proteins[name]['bomp'] = False if __DEBUG__: log_stderr(str(proteins)) return bomp_categories """
def pobierzPlan(user, password): tablicaHTMLow = [] commands.add_extra_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6') commands.clear_cookies() # Czyszczenie ciastek commands.reset_browser() #restart przegladarki commands.reset_output() #commands.config('use_BeautifulSoup', '0') commands.go("https://edukacja.pwr.wroc.pl/EdukacjaWeb/studia.do") # Przechodzimy do edukacji commands.showlinks() # DO USUNIECIA! Pokazuje linki commands.formclear('1') # Czysci formularz logowania commands.formvalue('1', 'login', user) # Podaje login commands.formvalue('1', 'password', password) # Podaje hasło commands.submit('0') # Klika zaloguj print("Linki po submit") # DO USUNIECIA! Pokazuje informacje commands.showlinks() # DO USUNIECIA! Pokazuje linki czyBledneLogowanie = sprawdzCzyBledneLogowanie(commands.show()) # Sprawdza czy na stronie wystapil blad czyLogowanieWInnejSesji = sprawdzCzyLogowanieWInnejSesji(commands.show()) if (czyBledneLogowanie == True): return "Bledne dane logowania" if (czyLogowanieWInnejSesji == True): return "zalogowany w innej sesji" commands.follow("zapisy.do") # Przechodzi linkiem na stronę zapisów # DO USUNIECIA! Pokazuje stronę po kliknięciu zapisy! print("Co po kliknieciu Zapisy") #commands.show() commands.showlinks() print "Forms:" formsy = commands.showforms() links = commands.showlinks() # Pobiera linki z danej strony control = None values = None select_options = None try: control = commands.browser.get_form("1").find_control('ineSluId', type="select") except: print("Nie ma selecta.") #control = commands.browser.get_form("1").find_control('ineSluId', type="select") if(control != None): # Jesli na stronie jest select values = pobierzElementySelect(commands.show()) # Pobieram parserem wartosci selecta select_options = utworzTabeleSelect(values) # Tworze nowy select podmieniajac stary select_attrs = {'id': 'some_id'} # Tworze atrybuty nowego selecta for v in values: # Lece petla po wartosciach selecta form = commands.get_browser().get_form("1") # Pobieram formularz ct = commands.browser.get_form("1").find_control('ineSluId', type="select") # Pobieram kontrolke z selectem add_select_to_form(form, 'ineSluId', select_attrs, select_options) # Tworze nowego selecta form.fixup() # Sprawdzam czy cos nowego nie zostało dodanego do form commands.showforms() commands.formvalue("1", ct.name, v.strip()) # Podaje wartosc dla selecta commands.submit('0') # Klikam submit html = pobierzZajecia(commands) # Pobieram zajecia commands.follow("zapisy.do") # Wracam do strony zapisów #commands.sleep(6) if (html != "skreslony z kierunku"): # Jesli funkcja zwrocila ze jest ktos skreslony to nie dodaje htmlu tablicaHTMLow.append(html) # Jeli nie zwrocila takiego komunikatu to dodajemy ten html do tablicy else: html = pobierzZajecia(commands) # Jesli nie ma selecta to pobieramy zajeci z tego kierunku i juz :D if (html != "skreslony z kierunku"): tablicaHTMLow.append(html) #print control.name, control.value, control.type #item = control.get("172748") #print item.name, item.selected, item.id, item.attrs #new_options #commands.formclear('1') # # #form = commands.get_browser().get_form("1") # #print('TO JEST TEN FORM:') #print(form) #print(len(form.controls)) # #notIsSelect = True #for ct in form.controls: # #print(ct) # if ct.name == 'ineSluId': # notIsSelect = False # print('JESTEM') # commands.sleep(3) # # select_attrs = {'id': 'some_id'} # values = pobierzElementySelect(commands.show()) # select_options = utworzTabeleSelect(values) # print(values) # print(select_options) # # # for v in values: # #form.fixup() # add_select_to_form(form, 'ineSluId', select_attrs, select_options) # form.fixup() # #ct.get(v).selected = True # print(ct) # print(form) # print(v) # commands.showforms() # # commands.formvalue("1", ct.name, v.strip()) # Podaje login # print("JEEDEFE") # commands.submit('0') # html = pobierzZajecia(commands) # commands.follow("zapisy.do") # print("JEEEEEEEEESSSSSTTTTTEEEEEMMM") # # commands.sleep(6) # if (html != "skreslony z kierunku"): # tablicaHTMLow.append(html) #ct.get(v).selected = False #for ct2 in form.controls: # if ct2.type == 'submit': # szukam wsrod niej tej co ma typ submit # commands.get_browser().clicked(form, ct2.attrs['name']) # klikam na ten przycisk # commands.get_browser().submit() #links = commands.showlinks() #commands.back() #commands.showforms() #return "no" #if (notIsSelect == True): # html = pobierzZajecia(commands) # if (html != "skreslony z kierunku"): # tablicaHTMLow.append(html) wyloguj(commands) commands.browser.clear_cookies() #usuwanie ciasteczek commands.reset_browser() #restart przegladarki commands.reset_output() return tablicaHTMLow
peps.add(line) # clean up peps peps = filter(lambda p: p.strip() != '' , peps) num_starting_peps = len(peps) # open our spreadsheet tc.go(data_file) print "Navigation Successful" # login print "Logging In..." tc.fv("1", "j_username", "jiao") tc.fv("1", "j_password", "jiao321") tc.submit('4') links = [] print "Login Successful, Fetching peptides..." for l in tc.showlinks(): links.append(l.url) links = filter(lambda l: 'Lorikeet' in l,links) # method to grab the sequence from the URL def extract_seq(l): seq = l.split('sequence')[1].split('&')[0][3:-2] seq = seq.translate(None, delchars) return seq # filter method that keeps track of which peps have been used def in_pep(l): seq = extract_seq(l)
def pobierzZajecia(commands): print("Po wybraniu") links = commands.showlinks() # Pobiera linki z danej strony commands.showforms() # Szuka w linkach tego przenoszącego na odpowiedni semestr. dateToday = datetime.date.today() firstOctober = datetime.date(dateToday.year, 10, 1) if dateToday > firstOctober: # Jesli dzisiaj jest wiekszy niz 1 Pazdziernika #Semestr zimowy for link in links: ktory = 0 if link.text == '' + str(dateToday.year) + '/' + str( dateToday.year + 1) + '': # Szukamy linka o tytule (rok/rok+1) ktory = ktory + 1 if ktory == 1: # Znalazł! commands.go( link.url ) # Przechodzimy pod url'sa który się kryje pod tym rokiem else: #Semest letni for link in links: ktory = 0 if link.text == '' + str(dateToday.year) + '/' + str( dateToday.year + 1) + '': # Szukamy linka o tytule (rok/rok+1) ktory = ktory + 1 if ktory == 2: # Znalazł! commands.go( link.url ) # Przechodzimy pod url'sa który się kryje pod tym rokiem # DO USUNIECIA! Pokazuje stronę po kliknięciu danego semestru! print("Co po kliknieciu semestru:") commands.showlinks() print "Forms:" commands.showforms() # Szuka w formularzach tego odpowiadającego za pokazanie zapisanych kursów. forms = commands.showforms() # Pobranie formularzy naszForm = None # Zmienna do ktorej zapiszemy znaleziony przez nas form for form in forms: # Petla po formualrzach if form.action == 'https://edukacja.pwr.wroc.pl/EdukacjaWeb/zapisy.do?href=#hrefZapisySzczSlu': # Jesli akcja danego formularza przenosi do szczegolow naszForm = form # To zapisujemy znaleziony formularz print(naszForm) # DO USUNIECIA! Wypisuje znaleziony formularz if ( naszForm != None ): # To znaczy ze znalazlo taki rok i ze jeszcze istnieje student na tym kierunku ctrl = naszForm.controls # pobieram ze znalezionego formularza wszystkie kontrolki for ct in ctrl: if ct.type == 'submit': # szukam wsrod niej tej co ma typ submit commands.get_browser().clicked( naszForm, ct.attrs['name']) # klikam na ten przycisk commands.get_browser().submit() print("Co po kliknieciu szczegoly zajec") commands.showlinks() #commands.sleep(5) print "Forms:" #commands.showforms() content = commands.show() #content ="" #commands.browser.clear_cookies() #usuwanie ciasteczek #print('PRZZEEEEEENIOSLEM') #commands.sleep(5) return content else: #commands.follow("zapisy.do") #print('PRZZEEEEEENIOSLEM') return ("skreslony z kierunku")
def annotate(params, proteins, \ force=False): """ Uses the TMB-HUNT web service (http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/aaTMB_Hunt.cgi) to predict if proteins are outer membrane beta-barrels. NOTE: In my limited testing, TMB-HUNT tends to perform very poorly in terms of false positives and false negetives. I'd suggest using only BOMP. """ # TODO: automatically split large sets into multiple jobs # TMB-HUNT will only take 10000 seqs at a time if len(proteins) >= 10000: log_stderr("# ERROR: TMB-HUNT(web): can't take more than 10,000 sequences.") return # set the user-agent so web services can block us if they want ... :/ python_version = sys.version.split()[0] agent("Python-urllib/%s (twill; inmembrane)" % python_version) out = 'tmbhunt.out' log_stderr("# TMB-HUNT(web) %s > %s" % (params['fasta'], out)) if not force and os.path.isfile(out): log_stderr("# -> skipped: %s already exists" % out) return parse_tmbhunt(proteins, out) # dump extraneous output into this blackhole so we don't see it if not __DEBUG__: twill.set_output(StringIO.StringIO()) go("http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/aaTMB_Hunt.cgi") if __DEBUG__: showforms() # read up the FASTA format seqs fh = open(params['fasta'], 'r') fasta_seqs = fh.read() fh.close() # fill out the form fv("1", "sequences", fasta_seqs) submit() if __DEBUG__: showlinks() # small jobs will lead us straight to the results, big jobs # go via a 'waiting' page which we skip past if we get it job_id = None try: # we see this with big jobs result_table_url = follow("http://www.bioinformatics.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/tmp_output.*.html") job_id = result_table_url.split('tmp_output')[-1:][0].split('.')[0] except: # small jobs take us straight to the html results table pass # parse the job_id from the url, since due to a bug in # TMB-HUNT the link on the results page from large jobs is wrong if not job_id: job_id = follow("Full results").split('/')[-1:][0].split('.')[0] log_stderr("# TMB-HUNT(web) job_id is: %s <http://www.bioinformatics.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/tmp_output%s.html>" % (job_id, job_id)) # polling until TMB-HUNT finishes # TMB-HUNT advises that 4000 sequences take ~10 mins # we poll a little faster than that polltime = (len(proteins)*0.1)+2 while True: log_stderr("# TMB-HUNT(web): waiting another %i sec ..." % (polltime)) time.sleep(polltime) try: go("http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/%s.txt" % (job_id)) break except: polltime = polltime * 2 if polltime >= 7200: # 2 hours log_stderr("# TMB-HUNT error: Taking too long.") return txt_out = show() # write raw TMB-HUNT results fh = open(out, 'w') fh.write(txt_out) fh.close() return parse_tmbhunt(proteins, out)
def annotate(params, proteins, \ force=False): """ DEPRECATED: The TMB-HUNT server appears to be permanently offline. Uses the TMB-HUNT web service (http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/aaTMB_Hunt.cgi) to predict if proteins are outer membrane beta-barrels. NOTE: In my limited testing, TMB-HUNT tends to perform very poorly in terms of false positives and false negetives. I'd suggest using only BOMP. """ # TODO: automatically split large sets into multiple jobs # TMB-HUNT will only take 10000 seqs at a time if len(proteins) >= 10000: log_stderr( "# ERROR: TMB-HUNT(web): can't take more than 10,000 sequences.") return # set the user-agent so web services can block us if they want ... :/ python_version = sys.version.split()[0] agent("Python-urllib/%s (twill; inmembrane)" % python_version) out = 'tmbhunt.out' log_stderr("# TMB-HUNT(web) %s > %s" % (params['fasta'], out)) if not force and os.path.isfile(out): log_stderr("# -> skipped: %s already exists" % out) return parse_tmbhunt(proteins, out) # dump extraneous output into this blackhole so we don't see it if not __DEBUG__: twill.set_output(StringIO.StringIO()) go("http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/aaTMB_Hunt.cgi" ) if __DEBUG__: showforms() # read up the FASTA format seqs fh = open(params['fasta'], 'r') fasta_seqs = fh.read() fh.close() # fill out the form fv("1", "sequences", fasta_seqs) submit() if __DEBUG__: showlinks() # small jobs will lead us straight to the results, big jobs # go via a 'waiting' page which we skip past if we get it job_id = None try: # we see this with big jobs result_table_url = follow( "http://www.bioinformatics.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/tmp_output.*.html" ) job_id = result_table_url.split('tmp_output')[-1:][0].split('.')[0] except: # small jobs take us straight to the html results table pass # parse the job_id from the url, since due to a bug in # TMB-HUNT the link on the results page from large jobs is wrong if not job_id: job_id = \ follow("Full results").split('/')[-1:][0].split('.')[0] log_stderr( "# TMB-HUNT(web) job_id is: %s <http://www.bioinformatics.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/tmp_output%s.html>" % (job_id, job_id)) # polling until TMB-HUNT finishes # TMB-HUNT advises that 4000 sequences take ~10 mins # we poll a little faster than that polltime = (len(proteins) * 0.1) + 2 while True: log_stderr("# TMB-HUNT(web): waiting another %i sec ..." % (polltime)) time.sleep(polltime) try: go("http://bmbpcu36.leeds.ac.uk/~andy/betaBarrel/AACompPred/tmp/%s.txt" % (job_id)) break except: polltime = polltime * 2 if polltime >= 7200: # 2 hours log_stderr("# TMB-HUNT error: Taking too long.") return txt_out = show() # write raw TMB-HUNT results fh = open(out, 'w') fh.write(txt_out) fh.close() return parse_tmbhunt(proteins, out)
def vote_story(hn_id): """vote for an story""" vote = request.POST.get('vote', '').strip() # we need a voting direction "up" or "down" if vote == '' or (vote != 'up' and vote != 'down'): return abort(code=400, text="Invalid voting direction, " + "needs to be 'up' or 'down'?") # check for the http auth if not request.auth: return abort(code=401, text="We need your username and " + "password for this operation.") # i can haz your hn password username, password = request.auth # we start web scraping go(HN_URL) # lets find the login url login_url = '' for link in showlinks(): if link.text == "login": login_url = link.url go(login_url) # we login #1 is the form (login, #2 is register) formvalue('1', 'u', username) formvalue('1', 'p', password) # 4 is the position of the submit button submit('4') # now we go to the story go('/item?id=%s' % hn_id) # and vote for it # find the link voting_url = '' for link in showlinks(): if link.url.startswith('vote?for=%s&dir=%s&by=%s' % (hn_id, vote, username)): voting_url = '/' + link.url if voting_url == '': return abort(code=400, text="Something's wrong at voting, " + "Could not find the voting url. " + "Most likely you already voted or " + "the username or password are wrong.") go(voting_url) # lets find the login url for link in showlinks(): if link.text == "logout": logout_url = link.url go(HN_URL) go(logout_url) # and we're done! reset_browser() # success! response is always in JSON return {'status': "ok", 'message': "voted successfully for %s" % hn_id}