def resolver(): import shared r = shared.r r.GET(shared.URL) from htmlparser import between, unescape from urllib import unquote shared.filename = between(r.Page, '<title>', '</title>')+'.flv' watch = between(r.Page, '<object', '</object>', include=True).replace('>','>\n') player = between(watch, 'name="movie" value="', '"') flashvars = between(watch, 'name="flashvars" value="', '"') def fmt_stream_map(var): v = var.split('&')[0] # first parameter, discard "quality=medium" etc. urls = unquote(v)[4:] # without "url=" return urls _flashvars = flashvars.replace('&',"'").split("'") for var in _flashvars: s = var.split('=') key = s[0] if key == 'url_encoded_fmt_stream_map': # that's the interesting part _values = unquote(s[1]).split(',') values = [fmt_stream_map(_values[i]) for i in range(len(_values))] print str(len(values))+' URLs:\n'+str(values) try: shared.URL = values[0] # get the first URL in the list except: shared.URL = ''
def extractSeasons(self): selectSeason = between(self.page, '<SELECT name="season"', '</SELECT>') i = 0 self.seasons = [] season = between(selectSeason, '<OPTION value="', '"') while season != '': self.seasons.append(season.strip()) i += 1 season = between(selectSeason, '<OPTION value="', '"', skip=i) return self.seasons
def extractEpisodes(self, season): allEpisodes = between(self.page, '<FORM name="episodeform'+season+'">', '</FORM>') i = 0 self.episodes = {} episode = between(allEpisodes, '>Episode ', '</OPTION') while episode != '': pageURL = between(allEpisodes, '<OPTION value="', '"', skip=i) i += 1 if pageURL != '': self.episodes[episode] = completeURL(pageURL) episode = between(allEpisodes, '>Episode ', '</OPTION', skip=i) return self.episodes
def extractVideoLink(self, page=None): if page is None: page = self.page url = between( between(page, 'question.png', 'IMDB Rating'), '<a target="_blank" href="', '"' ) if url == '': url = between(page, '<div id="emptydiv"><iframe src="', '"') url = url.replace('.com/embed/', '.com/file/') # movie splitted into several video files if 'teil1_aktiv.png' in page: print 'part 2 ...' client = HttpClient() client.GET( between( between(page, 'teil1_aktiv.png', 'teil2_inaktiv.png'), '<a href="', '"') ) url = [url, extractVideoLink(client.Page)] del client return url
def parseXML(xml): authors = [] skip = 1 a = between(xml, '<LastName>', '</LastName>', skip) while a != '': if a in unrecognized_authors.keys(): print 'Warning: changing problematic author name '+a+' to '+unrecognized_authors[a] a = unrecognized_authors[a] authors.append(a.replace(' ','').replace('-','')) skip += 1 a = between(xml, '<LastName>', '</LastName>', skip) title = between(xml, '<ArticleTitle>', '</ArticleTitle>').strip('\t .') journal = between(xml, '<ISOAbbreviation>', '</ISOAbbreviation>') year = between(between(xml, '<PubDate>', '</PubDate>'), '<Year>', '</Year>') doi = between(xml, '<ArticleId IdType="doi">', '</ArticleId>') url = 'http://www.ncbi.nlm.nih.gov/pubmed/'+ID bibtex = '@article{'+authors[0]+year+',\n' bibtex += '\ttitle = "'+title+'",\n' bibtex += '\tauthor = "{'+'} and {'.join(authors)+'}",\n' bibtex += '\tjournal = "'+journal+'",\n' bibtex += '\tyear = '+year+',\n' bibtex += '\tdoi = {'+doi+'},\n' bibtex += '\turl = {'+url+'}\n' bibtex += '}\n\n' return bibtex
def resolver(): import shared r = shared.r r.GET(shared.URL) from htmlparser import between s = shared.URL.split("/") MyVideoID = s[len(s)-2] img_src = between(r.Page, "<img id='i"+MyVideoID+"' src='", "'").replace("http://","") folder = "/".join( img_src.split("/")[1:4] ) shared.URL = "http://is3.myvideo.de/"+folder+"/"+MyVideoID+".flv"
def extractHosters(self, URL=None): global client if URL != None: client.GET(URL) self.page = str(client.Page) self.hosters = {} i = 0 tr = between(self.page, '<tr id="tablemoviesindex2"', "</tr>") while tr != '': link = completeURL( between(tr, '<a href="', '"') ) if link != '': m = Movie2kPage(link) link = m.extractVideoLink() del m name = between( between(tr, '<td ', '</td>', skip=1), ' ', '</a>' ) self.hosters[name] = link i += 1 tr = between(self.page, '<tr id="tablemoviesindex2"', "</tr>", skip=i) return self.hosters
def downloadCitation(ID): browser = HttpClient() browser.GET('http://www.ncbi.nlm.nih.gov/pubmed/'+ID) q = {} q['EntrezSystem2.PEntrez.Pubmed.Pubmed_SearchBar.SearchResourceList'] = 'pubmed' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_SearchBar.Term'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_SearchBar.CurrDb'] = 'pubmed' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_PageController.PreviousPageName'] = 'results' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sPresentation'] = 'xml' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FFormat'] = 'abstract' q['email_format'] = 'abstract' q['email_address'] = '' q['email_subj'] = '1+selected+item%3A+'+ID+'+-+PubMed' q['email_add_text'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FileFormat'] = 'abstract' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastPresentation'] = 'abstract' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.Presentation'] = 'xml' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.PageSize'] = '20' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastPageSize'] = '20' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.Sort'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastSort'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FileSort'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.Format'] = 'text' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastFormat'] = '' q['CitationManagerStartIndex'] = '1' q['CitationManagerCustomRange'] = 'false' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_ResultsController.ResultCount'] = '1' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_ResultsController.RunLastQuery'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.HistoryDisplay.Cmd'] = 'displaychanged' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.EmailTab.EmailReport'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.EmailTab.EmailFormat'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.EmailTab.EmailCount'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.EmailTab.EmailStart'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.EmailTab.EmailSort'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.EmailTab.Email'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.EmailTab.EmailSubject'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.EmailTab.EmailText'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.EmailTab.EmailQueryKey'] = '' q['EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.EmailTab.QueryDescription'] = '' q['EntrezSystem2.PEntrez.DbConnector.Db'] = 'pubmed' q['EntrezSystem2.PEntrez.DbConnector.LastDb'] = 'pubmed' q['EntrezSystem2.PEntrez.DbConnector.Term'] = '' q['EntrezSystem2.PEntrez.DbConnector.LastTabCmd'] = '' q['EntrezSystem2.PEntrez.DbConnector.LastQueryKey'] = '1' q['EntrezSystem2.PEntrez.DbConnector.IdsFromResult'] = '' q['EntrezSystem2.PEntrez.DbConnector.LastIdsFromResult'] = '' q['EntrezSystem2.PEntrez.DbConnector.LinkName'] = '' q['EntrezSystem2.PEntrez.DbConnector.LinkReadableName'] = '' q['EntrezSystem2.PEntrez.DbConnector.LinkSrcDb'] = '' q['EntrezSystem2.PEntrez.DbConnector.Cmd'] = 'displaychanged' q['EntrezSystem2.PEntrez.DbConnector.TabCmd'] = '' q['EntrezSystem2.PEntrez.DbConnector.QueryKey'] = '' q['p%24a'] = 'EntrezSystem2.PEntrez.Pubmed.Pubmed_ResultsPanel.Pubmed_DisplayBar.SetDisplay' q['p%24l'] = 'EntrezSystem2' q['p%24st'] = 'pubmed' browser.POST('http://www.ncbi.nlm.nih.gov/pubmed', q, {'Accept': 'application/xml'}) xml = between(browser.Page, '<pre>', '</pre>').replace('<', '<').replace('>', '>').strip() return xml
def External_IP(): r = HttpClient() r.GET("http://www.wieistmeineip.de/") ip = between(r.Page, '<strong>', '</strong>') return ip
def extractTitle(self): return between(self.page, 'style="color:#000000;">', '<').strip().replace('\n', ' ')