def search(self): """ Takes value from text fields and uses the wiki algorithm""" self.text.delete(0.0, END) # Assigns start to random if empty x = self.start.get() y = self.end.get() if x == '': while True: try: x = wikipedia.random(1) self.text.insert(0.0, 'Random start is %s.\n' % x) break except UnicodeEncodeError: pass # Assigns end to random if empty if y == '': while True: try: y = wikipedia.random(1) self.text.insert(0.0, 'Random end is %s.\n' % y) break except UnicodeEncodeError: pass self.text.insert(0.0, 'Start is %s.\nTarget is %s.\n' % (x,y))
def get_random_articles_v2(): """Retrieves random articles until the user types 'stop' """ ans = input('Press enter to continue or stop to stop: ') while ans != 'stop': try: print(wikipedia.summary(wikipedia.random())) print() ans = input('Press enter to continue or stop to stop: ') except wikipedia.exceptions.DisambiguationError: print(wikipedia.summary(wikipedia.random())) print() ans = input('Press enter to continue or stop to stop: ')
def getPage(): rpage = wikipedia.page(wikipedia.random(1)) while len(rpage.content) <= minlength: try: #TODO: Exception for lists rpage = wikipedia.page(wikipedia.random(1)) except wikipedia.exceptions.DisambiguationError as e: print 'ERROR' rpage = wikipedia.page(e.options[0]) else: print 'MISS' rpage = wikipedia.page(wikipedia.random(1)) return rpage
def get_random_page(): # https://wikipedia.readthedocs.org/en/latest/quickstart.html random_title = wikipedia.random(pages=1) random_page = None while not random_page: try: random_page = wikipedia.page(title=random_title) except wikipedia.PageError: random_title = wikipedia.random(pages=1) random_page = None except wikipedia.DisambiguationError as e: random_title = random.choice(e.options) random_page = None return random_page
def wikipedia_random(self, n): print "Downloading pages from Wikipedia. This may take a moment..." f = open(self.inf, "w") pages = [] # get a list of random wikipedia pages. # wikipedia.random can only get 10 at a time, so call it so many times for x in range(n/10): pages += (wikipedia.random(10)) if n%10 > 0: pages += (wikipedia.random(n%10)) log.info("Found {} random wikipedia pages".format(len(pages))) # get the summary info from all the pages. # this is a lot of pages, so can take some time for page in pages: info = None try: log.debug("Getting page "+page) info = wikipedia.page(page) # if the request threw a disambiguation error, try to get the first suggestion # if that files, just give up on that request. we didn't want it that much anyway except wikipedia.exceptions.DisambiguationError as e: try: log.debug("Getting page "+page) info = wikipedia.page(e.options[0]) except wikipedia.exceptions.DisambiguationError as e: continue except (wikipedia.exceptions.HTTPTimeoutError, wikipedia.exceptions.PageError): continue except (wikipedia.exceptions.HTTPTimeoutError, wikipedia.exceptions.PageError): continue # save the summary. if it exceeds 127 characters, try to truncate by the first sentence end. # if that fails, write it anyway, but it will give warning when it generates the config s = info.summary.encode("ascii", "ignore").replace("\n", " ") if len(s) < 127: log.info("Wrote line with no edits") f.write(s + "\n") else: if s[:127].rfind(". ") != -1: f.write(s[:s[:127].rfind(". ")+1] + "\n") log.info("Summary too long, but was truncated") else: f.write(s + "\n") log.info("Summary too long and couldn't truncate") f.close()
def __init__(self): try: self.page = wikipedia.page((wikipedia.random(pages=1))) self.name = self.page.title self.summary = self.page.summary except wikipedia.DisambiguationError: Wiki_page()
def get_random_wiki(): random_wiki = wikipedia.random() random_wiki_page = wikipedia.page(random_wiki) summary = wikipedia.summary(random_wiki, sentences=1) return random_wiki_page.title, random_wiki_page.url, summary
def run(self, articles_number, words_number, result_file): result = [] most_common_words = load_most_commons_words() most_common_words = most_common_words[:words_number] if words_number > len(most_common_words): raise ValueError("most_common_words database too small: {} > {}".format( words_number, len(most_common_words))) bar = progressbar.ProgressBar() for n in bar(range(articles_number)): article_words_occurrence = [0] * words_number while True: try: page = wikipedia.page(wikipedia.random(), auto_suggest=True) break except Exception: pass for word in simplify_article_and_return_words(page.content): if word in most_common_words: article_words_occurrence[most_common_words.index(word)] += 1 result.append(article_words_occurrence) with open(result_file, "w") as f: f.write("\n".join([",".join(str(number) for number in row) for row in result]))
async def wiki(self, c: Command): wikipedia.set_lang(self.settings["language"]) sentences = self.settings["sentences"] query = c.message # expected response for choosing a topic from `self.searches` if len(query) == 1 and query.isdigit(): query = self.searches[int(query)] elif len(query) == 0: query = wikipedia.random(pages=1) try: page = wikipedia.page(query) summary = wikipedia.summary(query, sentences=sentences) except wikipedia.exceptions.PageError as err: await self.send_message("PageError, FIXME") except wikipedia.exceptions.DisambiguationError as err: self.searches = err.options[:10] # iterate over self.searches to create a formated string, # ie "0) List Index 0\n1) List Index 1" etc fmtd = "\n".join([str(i)+") "+m for i,m in enumerate(self.searches)]) await self.send_message("Select one with \"wiki #\"\n{}".format(fmtd), clean=False) else: url = page.url # wikipedia lib reformats headings? yuck. summary = re.sub("==.+==", " ", summary) summary = re.sub("\n", "", summary) # TODO maybe pull this limit from the settings await self.send_message("{}\n{}".format(url,summary), clean=False, limit=1)
def random_calling(): quesion = wikipedia.random(1) print(quesion, len(str(quesion).split())) if len(str(quesion).split()) == 1: return str(quesion) else: return random_calling()
def getArt(): randArt = wikipedia.random(pages = 1) userInput = input("Would you like to read about %s? " %randArt) if userInput.lower() == "yes" or userInput.lower() == "y": launchArt(randArt) if userInput.lower() == "no" or userInput.lower() == "n": getArt()
def get_wikipedia_random_pages(size_mbytes): print 'Starting download random pages from wikipedia, writing to file %s' % WIKI_OUTPUT_FILE output_file = open(WIKI_OUTPUT_FILE, 'a') curr_progress = 0 curr_size = 0 iters = 0 data_size_iter = 0 size_bytes = MBytesToBytes(size_mbytes) start = time.time() while curr_size < size_bytes: try: print 'Starting iteration %d ' % iters itemset = wikipedia.page(wikipedia.random( pages=1)).content.encode('utf-8').lower().split(" ") except: continue itemset_hashes = [str(hash(word)) for word in itemset] itemset_line = " ".join(itemset_hashes) + NEWLINE data_size_iter += len(itemset_line) curr_size += len(itemset_line) iters += 1 output_file.write(itemset_line) if curr_size > curr_progress + PROGRESS: print 'Already written %d bytes' % curr_size if iters % 10 == 0: end = time.time() print 'Current 10 iterations took %d seconds and %s bytes collected' % ( (end - start), data_size_iter) data_size_iter = 0 start = time.time() output_file.close()
def randomWiki(): ''' This function gives you a list of n number of random articles Choose any article. ''' number=input("No: of Random Pages : ") lst=wk.random(number) for i in enumerate(lst): print(i) try: key=input("Enter the number : ") assert key>=0 and key<number except AssertionError: key=input("Please enter corresponding article number : ") page=wk.page(lst[key]) url=page.url #originalTitle=page.original_title pageId=page.pageid #references=page.references title=page.title #soup=BeautifulSoup(page.content,'lxml') pageLength=input('''Wiki Page Type : 1.Full 2.Summary : ''') if pageLength==1: soup=fullPage(page) print(soup) else: print(title) print("Page Id = ",pageId) print(page.summary) print("Page Link = ",url) #print "References : ",references pass
def get_decoys(self, target): decoy1_search = [] import wikipedia while len(decoy1_search) < target: try: decoy1 = wikipedia.page(wikipedia.random()) decoy1_query = wikipedia.search(decoy1) for i in range(len(decoy1_query)): if len(decoy1_search) >= target: break else: try: member0 = wikipedia.page(decoy1_query[i]) member0_list = member0.references for j in range(len(member0_list)): decoy1_search += [['00', member0_list[j]]] if len(decoy1_search) >= target: break except: continue except: continue return decoy1_search
def getRandArticle(): # getting directory for easy use myPath = os.path.expanduser("~/documents/contexter/") # how many articles to get for x in xrange(0, 1): # Get an article, if DisambiguationError, then try again try: article = wikipedia.random() except wikipedia.exceptions.DisambiguationError: print "Disambiguation, retrying..." article = wikipedia.random() except wikipedia.exceptions.PageError: print "PageError, retrying..." article = wikipedia.random() pageObj = wikipedia.page(article) text = pageObj.content pageUrl = pageObj.url # Writing the text and to the created file, encoded utf8 f = open(article + ".txt", "w") f.write(text.encode('utf-8')) f.close() # Getting all lines f = open(article + ".txt", "r") tempText = f.readlines() f.close() # Decoding all lines for cleanup of file for i, v in enumerate(tempText): tempText[i] = tempText[i].decode('utf-8') # Clean up the file, the file has '==' characters denoting sections f = open(article + ".txt", "w") for line in tempText: if ('==' not in line): f.write(line.encode('utf-8')) f.close() # move file into raw text folder shutil.move(myPath + "scripts/" + article + ".txt", myPath + "/raw text/" + article + ".txt") articleLoc = myPath + "/raw text/" + article + ".txt" return [articleLoc, pageUrl]
def makePage(): # get new random page global page text.delete(1.0, END) try: page = wikipedia.page((wikipedia.random(pages=1))) except wikipedia.DisambiguationError: makePage()
def read_random_from_topic(topic = None): """Reads from a topic (or random topic if not specified), and returns an internal content type that includes a random sentence""" if not topic: topic = wikipedia.random() content = wikipedia.page(topic) return to_system_format(content)
def summary(article): summary_list = [] if article is None: article = wikipedia.random() if article is not None: summary = wikipedia.summary(article) summary_list.append(summary) return summary_list
def insert_random_loop(self): i = 1 while True: ra = wikipedia.random() article = wikipedia.page(ra) self.rs.add_document(f'doc{i}', title=article.title, body=article.content) print(f'>>> Inserted {article.title}') i += 1
def get_wp_pages(): #Returns a list of Wikipedia page titles, to be used as concepts pagenames = [] for item in wikipedia.random(pages=8): if len(item) < 18: pagenames.append(item) return pagenames
def get_title( progress_count ): """Obtain title of a random wikipedia article, checking it's not a disambiguation.""" # Print progress sys.stdout.write( "%d " %progress_count ) sys.stdout.flush() # Get title, try to load page. Pick another if there's a disambiguation error. while True: try: title = wikipedia.random() wikipedia.page( title ) break except ( wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError ): title = wikipedia.random() return title
def runArticleDownloader(): for i in range(1, 2501): query = wikipedia.random(1) try: article = wikipedia.page(query) except: continue save_to_file(article.url, article.summary)
def fact(): pageTitle = wikipedia.random(); page = wikipedia.summary(pageTitle, sentences=1); if ("is" in page or "was" in page) and page[0] != '<': page = re.sub("[\(\[].*?[\)\]]", "", page) return page else: return fact()
def GetArticle(): """ Generates an article """ try: return wikipedia.page(wikipedia.random()) except: # an error ocurred, probably gone to a disambiguation page, try again return GetArticle()
def __init__(self): self.page = None while self.page is None: try: title = wikipedia.random() self.page = wikipedia.page(title=title) except: pass
def index(request): wikipedia.set_lang("fr") titreArticle = [] for i in range(1, 100): titreArticle.append(wikipedia.random(pages=1)) context = {"titre": titreArticle} return render(request, 'home.html', context)
def fact(): pageTitle = wikipedia.random() page = wikipedia.summary(pageTitle, sentences=1) if ("is" in page or "was" in page) and page[0] != '<': page = re.sub("[\(\[].*?[\)\]]", "", page) return page else: return fact()
def find_pages(lang_wiki='de', to_extract=1000, max_time_s=4 * 60 * 60, verbose=False): """ Extract paragraphs from random wikipedia pages of a given language. Parameters ---------- lang_wiki : str, optional (default: de) to_extract : int, optional (default: 1000) Number of paragraphs to be extracted max_time_s : int, optional (default: 4h) Maximum time in seconds to run the paragraph extraction. verbose : boolen, optional (default: False) Returns ------- tuple : extracted_paragraphs, list of source pages """ wikipedia.set_lang(lang_wiki) extracted_paragraphs = [] used_pages = set() queried = [] bar = progressbar.ProgressBar(redirect_stdout=True, max_value=to_extract) t0 = time.time() while (len(extracted_paragraphs) < to_extract and (time.time() - t0) < max_time_s): random_pages = wikipedia.random(pages=10) for random_page in random_pages: if '/' in random_page: # see https://to.wikipedia.org/wiki/Tuʻi_Tonga_Fefine/en continue parse_page(random_page, extracted_paragraphs, queried, used_pages, bar, verbose, to_extract) # print("Start extracting pages") if False and len(extracted_paragraphs) < to_extract: apcontinue = '' max_reached = False while not max_reached and len(extracted_paragraphs) < to_extract: out = get_all_page_titles(lang_wiki, apcontinue=apcontinue, max_pages=10) page_titles_queue = out['page_titles'] max_reached = out['max_reached'] apcontinue = out['apcontinue'] random.shuffle(page_titles_queue) print("Loaded {} pages".format(len(page_titles_queue))) while (len(extracted_paragraphs) < to_extract and len(page_titles_queue) > 0): print(len(page_titles_queue)) page_title, revision_id = page_titles_queue.pop() parse_page(page_title, extracted_paragraphs, queried, used_pages, bar, verbose) bar.update(to_extract) bar.finish() return extracted_paragraphs, list(used_pages)
def get_random_articles_v1(number_of_articles_wanted): """Given the wanted number of articles returned, get random wikipedia articles""" if number_of_articles_wanted == 1: print(wikipedia.summary(wikipedia.random())) else: list_of_articles = wikipedia.random(number_of_articles_wanted) try: for a in list_of_articles: article = a[:] if ('disambiguation' in wikipedia.page(a).title) or ('it may refer to' in wikipedia.page(a).title): list_of_articles.remove(a) list_of_articles.append(wikipedia.random()) print(list_of_articles.index(a)+1," - "+wikipedia.summary(a)) print() except wikipedia.exceptions.DisambiguationError: list_of_articles.remove(article) list_of_articles.append(wikipedia.random(article))
def random_page(): wikipedia.set_lang("fy") random = wikipedia.random(1) try: result = wikipedia.page(random).summary except wikipedia.exceptions.DisambiguationError as e: result = random_page() return result
def random_wikipedia_article(): title = wikipedia.random(pages=1) try: return wikipedia.page(title).content except wikipedia.exceptions.DisambiguationError: # wikipedia.exceptions.DisambiguationError as e: # sometimes, the line wikipedia.page(e.options[0]).content raises again a DisambiguationError exception, # which is unfortunate (see for example the wikipedia page 'Shadi') return random_wikipedia_article()
def wiki(): page = wikipedia.random(1) var = "Do you wanna read about "+page+" ?" label = tk.Label(root, text=var, fg='red', font = ('calibri',10)) label.place(relx=0.4,rely=0.3) yes = tk.Button(root, text='Yes', command = lambda:yes1(page) ,font = ('calibri',10)) yes.place(relx=0.4,rely=0.5, relheight=0.2, relwidth=0.2) no = tk.Button(root, text='No', command = wiki ,font = ('calibri',10)) no.place(relx=0.4,rely=0.7, relheight=0.2, relwidth=0.2)
def random(message): # noinspection PyBroadException try: random_title = str(wikipedia.random(pages=1)) random_page = wikipedia.page(random_title) random_result = str(random_page.url) bot.send_message(chat_id=message.chat.id, text=random_result, reply_markup=main_keyboard()) except Exception: bot.send_message(chat_id=message.chat.id, text=error, reply_markup=main_keyboard())
def random_page(): random = wikipedia.random(1) try: result = wikipedia.page(random) except wikipedia.exceptions.DisambiguationError as e: result = random_page() print('\033[1m' + result.title + '\033[0m \n') print(result.summary + '\n') print(result.url)
def on_click_random(self, num): random_text = wikipedia.random() while (len(random_text) >= 17): random_text = wikipedia.random() if num is 1: self.ids.search_one.text = random_text self.ids.compare_button.disabled = True self.ids.detect_one.background_normal = 'red_bar2.png' else: self.ids.search_two.text = random_text self.ids.compare_button.disabled = True self.ids.detect_two.background_normal = 'red_bar2.png' pass
def test_random_pages(self): print("------ random pages test ------------") pages = wikipedia.random(2) print(pages) article1 = Article(pages[0], repo=DataDict()) article2 = Article(pages[1], op=op_backlinks, repo=DataDict()) run_test(article1, article2) print('=========================================')
def handle_random_intent(self, message): """ Get a random wiki page. Uses the Special:Random page of wikipedia """ # Talk to the user, as this can take a little time... search = wiki.random(pages=1) self.speak_dialog("searching", {"query": search}) self._lookup(search)
def main(out: str): outpath = pathlib.Path(out) random_pages = map(lambda title: wikipedia.page(title), wikipedia.random(10)) for page in random_pages: filename = outpath.joinpath(page.title).with_suffix(".txt") with filename.open("w") as f: f.write(page.content)
def newAnswers(): alternates = wikipedia.random(ALTERNATE_OPTIONS) choices = [] choices.extend(alternates) choices.append(correctAnswer) shuffle(choices) prefixes = [i for i in range(len(choices))] answers = dict(zip(prefixes, choices)) return answers
def get_random_wiki(): page_name = wikipedia.random(1) try: page_link = wikipedia.page(page_name).url return page_link, page_name except wikipedia.exceptions.DisambiguationError: return None, None except wikipedia.exceptions.PageError: return None, None
async def get_random_wikipedia_page(self, ctx): """wikipediaからランダムな記事を一つ取得する""" await ctx.send("妹「wikipediaからランダムな記事を取ってくるね!」") await ctx.send("妹は中空に手を翳し、何かを掴むような動作をしている。") # 日本語wikipediaからランダムな単語を一つ決めてページを取得する self.wikipedia_page = wikipedia.page(wikipedia.random()) await ctx.send("妹「ランダムな記事を取ってきたよ!」")
def index_wikipedia(num_pages): for _ in range(0, num_pages): p = wikipedia.random() try: wiki_page = wikipedia.page(p) Page.objects.update_or_create( title=wiki_page.title, defaults={"content": wiki_page.content} ) except Exception: logger.exception("Failed to index %s", p)
def gethering_pages(no,pages_no): if no == 1: print("Fatching 1st 20 pages.") else: print("Fatching next 20 pages.") page_names = [wikipedia.random(1) for i in range(pages_no)] pages = page_names return pages
def wikirandom(sents, boole, client, message, lang="it"): wikipedia.set_lang(lang) wikipedia.set_rate_limiting(rate_limit=True) random = wikipedia.random() result = wikipedia.summary(random, sentences=sents) if boole: return result else: result += "\n" + create_link(random, lang) return utils.get_config.sendMessage(client, message, result)
def random_page(): # Fetches a random page from wikipedia try: page = wikipedia.page(wikipedia.random()) print(page.links) show_links(page) # Deals with wiki disambiguation error except wikipedia.exceptions.DisambiguationError: random_page()
def wiki(): global page random_article = wikipedia.random() page = wikipedia.page(random_article) contents = page.summary sentences = contents.split(". ") os.system("clear") return sentences
def random(): try: query = wikipedia.random(pages=1) input = wikipedia.WikipediaPage(title=query).summary title = wikipedia.WikipediaPage(title=query).title image = wikipedia.WikipediaPage(title=query).images[0] client = Algorithmia.client('Simple simR+{}'.format(api_key)) algo = client.algo('nlp/Summarizer/0.1.2') contents ={ 'image': image, 'title': title, 'summary': algo.pipe(input), 'link': 'https://en.wikipedia.org/wiki/{}'.format(wikipedia.random(pages=1)) } except: return json.dumps({ 'msg': "Sorry, we couldn't find a Wikipedia article matching your search." }) return json.dumps(contents)
def do_til(self, arg): if check_core('DISCIPLINE') is not True: req = urllib2.Request("http://hipsteripsum.me/?paras=1&type=hipster-centric", headers={'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36', "Accept" : "text/html"}) contents = urllib2.urlopen(req).read() soup = BeautifulSoup(contents, "html5lib") for div in soup.find_all(id='content'): text = div.contents[1].string elif check_core("CURIOSITY") is True and check_core('EMPATHY') is True: wikipedia.set_lang("en") print "english" text = wikipedia.summary(wikipedia.random(pages=1)) elif check_core("CURIOSITY") is not True: wikipedia.set_lang("simple") print "simple" text = wikipedia.summary(wikipedia.random(pages=1)) elif check_core("CURIOSITY") is True and check_core('EMPATHY') is not True: req = urllib2.Request("http://www.elsewhere.org/pomo/", headers={"Accept" : "text/html"}) contents = urllib2.urlopen(req).read() soup = BeautifulSoup(contents, "html5lib") h3counter = 0 text = '' for div in soup.find_all('div', class_='storycontent'): for element in div.contents: if element.name == 'h1': text = text + element.string + '\n' elif element.name == 'h3' and h3counter < 1: text = text + element.string + '\n' h3counter = h3counter + 1 elif element.name == 'h3' and h3counter == 1: break elif element.name == 'p' and element.string is not None: text = text + element.string + '\n' question = "Is that interesting " + dev_names[random.randint(0, len(dev_names)-1)] + "?" say(text, agnes_core.voice) if check_core("CURIOSITY") is not True: say(question, agnes_core.voice)
def get_random_page(): """Return a random wikipedia page as a wikipedia object. """ ambig_error = True while ambig_error: ambig_error = False try: page = wikipedia.page(wikipedia.random()) except wikipedia.exceptions.DisambiguationError: ambig_error = True return page
def random(self, c, e, args): while True: try: p = wikipedia.page(wikipedia.random()) if p: c.privmsg(get_target(c, e), '\x02{}\x0f - {} [ {} ]'.format(p.title, smart_truncate(p.summary.replace('\n', ' ')), p.url)) break except DisambiguationError: pass
def get_random_page(nsent): """ Get sentences from random page (up to nsent sentences) """ page = wiki.page(wiki.random()) sentences = split_content(page.content) sentences = filter_noise(sentences) ct = len(sentences) takenct = min(nsent, ct) takensent = sentences[:takenct] return (takensent, takenct)
def main(begin, end): # TODO: FIX DISAMBIGUATION ERRORS ########################################### try: start = wikipedia.page(begin) if begin != "" else wikipedia.page(wikipedia.random()) except wikipedia.exceptions.PageError: return ("\nSorry the start page %s doesn't exist" %begin) try: dest = wikipedia.page(end) if end != "" else wikipedia.page(wikipedia.random()) except wikipedia.exceptions.PageError: return ("\nSorry the ending page %s doesn't exist" %end) ############################################################################# previousList = [] listOfConnections = [] currentList = getLinks(start.title, masterList) result = 1; for y in range(len(currentList)): associateTo(currentList[y],start.title) if(findLink(currentList, dest) >= 0 ): listOfConnections = acquireAllAssociations(dest.title, result, listOfConnections) else: previousList = currentList for i in previousList: if i not in masterList: masterList.append(i) result = layerSearch(previousList, result, dest) listOfConnections = acquireAllAssociations(dest.title, result, listOfConnections) return list(reversed([dest.title] + listOfConnections))
def readRandom(): randomArticles = wikipedia.random(pages=3) try: for topic in randomArticles: # string case crawlAndLearn(topic) print('Words:' + str(len(words))) print('POS:' + str(len(partsOfSpeech))) except AttributeError: for topic in randomArticles: crawlAndLearn(topic) print('Words:' + str(len(words))) print('POS:' + str(len(partsOfSpeech)))
def handleRandomCommand(self): """Gets a random page from Wikipedia and prints the summary paragraph. """ returnString = "" pg = wikipedia.random(1) try: returnString = wikipedia.summary(pg) except wikipedia.exceptions.DisambiguationError: returnString = self.handleRandomCommand() return returnString
def wikilookup(choices): lookup=choices[0]+" "+choices[1] if debug: print "lookup:",lookup res=wikipedia.random(pages=5) for k in res: print "search",k p = wikipedia.page(k) print "title:",p.title images=p.images for i in images: print "image:",i return []
def simple_giver(num, min_links=5, min_lenth=3000, max_word=1, exclude_titles=exclude_t): all_pages = [] pages_title = [] while len(all_pages) < num_of_pages: try: temp_title = wp.random(pages=1) temp_content = wp.WikipediaPage(temp_title).content pages_title.append(temp_title) all_pages.append(temp_content) print("page loaded... ", end = " ") except (wp.exceptions.WikipediaException, KeyError, ValueError): pass return(all_pages, pages_title)
def crawl(self): pagelist=[] for document in wikipedia.random(self.crawlCount): try: doc = wikipedia.page(document) #create document page = WikiPage(doc.title,doc.content) print("page title : %s \n" % page.getTitle()) pagelist.append(page) except Exception, e: self.error_file.write('Search failed: {%s} { %s } \n' % (document, e)) print 'Search failed: %s' % e
def loadRandWikiPage(self): """Grabs a random wiki page for start and end of course. Returns tuple: links first position, title second""" try: Random1 = W.random() self.randomPage = W.page(Random1) #If first random page throws ambiguity error, try again except: self.loadRandWikiPage() if self.randomPage.title: return self.randomPage.links, self.randomPage.title else: print "Not valid"
def run_script(): if opts.random: page = wiki.page(str(wiki.random())) message = colored('Random Wikipedia Page', 'yellow') elif not isinstance(opts.search, type(None)): results = wiki.search(opts.search) page = show_list(results) message = colored(str(page.title) + ' Wikipedia Page', 'yellow') print message print '' print_page(page)