def hello_world(): league_id = request.args.get('league_id') if league_id: return render_template('index.html', data=scrape(league_id)) # return render_template('index.html', data=['test', 'test2', 'test3']) else: return render_template('formTemplate.html')
def similarity_score(self, query, url): page = scrape(Site(url, True)) keywords = get_keywords(page.text) score = 0 for word in query: if word in keywords: score += 1 return (score)
def main(): # Program execution starts here. url = "http://sites.cs.queensu.ca/courses/cisc121/a2/logons.txt" print(f"Retrieving data from {url}.") data = web_scraper.scrape(url) n = len(data) print(f"{n} records read.") # This loop converts the data from a list of strings to a list of lists. for i in range(n): data[i] = line_to_list(data[i]) # CSV file creation # This code first opens a file with the requested name, # then joins all values in each interior array with commas and # makes a newline between each array, writing the result into the csv. csv_filename = "logons.csv" csv_file = open(f"{csv_filename}", "w") for i in data: csv_file.write(",".join([str(x) for x in i]) + "\n") csv_file.close() # JSON file creation # Note: Once your dict_make.make_dict() function is working, # this code will create a JSON file for you. You don't need # to do anything in this section. json_filename = "logons.json" dict_keys = ["user_id", "os", "pc_id", "datetime"] dict_list = [] for i in range(n): dict_list.append(dict_maker.make_dict(dict_keys, data[i])) if json_writer.write_json_file(json_filename, dict_list): print(f"Write to {json_filename} complete.") else: print(f"Write to {json_filename} failed.") while True: # This code checks all entries in data for the input PC id, # if it finds a match, it checks if the operating system is Linux. # In cases where both conditions are met, adds 1 to the Linux login count, # and prints the count after the whole list has been checked. print("Checking the number of Linux logons for a specific PC.") pc_id = input("Enter the PC's ID number or just press Enter to quit: ") logon_count = 0 if pc_id == "": break for i in range(n): if data[i][2] == pc_id: if data[i][1] == "Linux": logon_count = logon_count + 1 print(f"PC {pc_id} was logged onto in Linux {logon_count} times.") print("Done.")
def populate_web(self): """ Populates the web and the inverted index keyword dictionary with the urls provided """ occurdic = {} for url in self.urls: page = scrape(url) keywords = get_keywords(page.text) index = len(self.web) self.web.append(Index(index, page.title, page.url, page.links_to)) for word in keywords: value = OccurrenceList() value.append(index) occurdic[word.lower()] = value self.keywords.add(word.lower(), value) self.rank_page(occurdic, len(self.web))
update, frames=num_of_iterations, repeat=False) canvas.draw() # Only run if script is called directly. if __name__ == '__main__': ## Read parameters from command line ## (if none set, will set defaults as defined in read_cmd.py). parameters = read_cmd.parameters(argv) num_of_rabbits, lifespan, neighbourhood, num_of_iterations, animate = parameters ## Scrape initial x- and y-values from webpage. url = 'http://www.geog.leeds.ac.uk/courses/computing/practicals/python/agent-framework/part9/data.html' scraped_coordinates = web_scraper.scrape(url) ## Create environment from CSV file. environment = Environment("in.txt").env ## Create rabbits. rabbits = create_rabbits(environment, num_of_rabbits, scraped_coordinates, lifespan) # Configure Tkinter. root = tkinter.Tk() root.wm_title("Model") fig = matplotlib.pyplot.figure(figsize=(7, 7)) canvas = matplotlib.backends.backend_tkagg.FigureCanvasTkAgg(fig, master=root) canvas._tkcanvas.pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)
if __name__ == '__main__': ## Read parameters from command line ## (if none set, will set defaults as defined in read_cmd.py). parameters = read_cmd.parameters(argv) num_of_rabbits, lifespan, neighbourhood, num_of_iterations, animate = parameters report = f"""##### Initial Parameters ##### Number of rabbits: {num_of_rabbits} Lifespan: {lifespan} Number of Iterations: {num_of_iterations} """ print(report) ## Scrape initial x- and y-values from webpage. URL = 'http://www.geog.leeds.ac.uk/courses/computing/practicals/python/agent-framework/part9/data.html' scraped_coordinates = web_scraper.scrape(URL) print(f"You have successfuly scraped coordinates from {URL}.") ## Create environment from CSV file. file = "in.txt" environment = agentframework.Environment(file).env print(f"Environment successfully created from {file}.") ## Create rabbits. rabbits = create_rabbits(environment, num_of_rabbits, scraped_coordinates, lifespan) print(f"You have successfuly created {num_of_rabbits} rabbits.") ## Run model and time it. print("We are now going to run the model.") time_result = run_model()
def main(): """Program execution starts here.""" url = "http://sites.cs.queensu.ca/courses/cisc121/a2/logons.txt" data = web_scraper.scrape(url) n = len(data) data = parse_logons.logons_to_list_of_dicts(data) # The data list (and the file it came from) are in chronological # order. That means data is currently sorted by timestamp. sort_on_key = "timestamp" main_menu_title = "Select an operation:" main_menu_choices = [ "Sort logons data", "Show a selection of the data", f"Search for a particular {sort_on_key}" ] sort_on_menu_title = "Select a key to sort on:" sort_on_menu_choices = list(data[0].keys()) sort_menu_title = "Select a sort algorithm:" sort_menu_choices = [ "Insertion sort", "Bubble sort", "Bubble sort (optimized)", "Selection sort" ] while True: choice = menu.do_menu(main_menu_title, main_menu_choices) if choice is None: return # Exit main() (and program). if choice == 1: # Sort logons data. while True: sort_on_choice = menu.do_menu(sort_on_menu_title, sort_on_menu_choices) if sort_on_choice is None: break # Return to main menu. sort_on_key = sort_on_menu_choices[sort_on_choice - 1] # Change last choice in main menu to reflect the new # sort_on_choice. main_menu_choices[ -1] = f"Search for a particular {sort_on_key}" sort_choice = menu.do_menu(sort_menu_title, sort_menu_choices) if sort_choice is None: break # Return to main menu. # If we're here, we can proceed with a sort. print() if sort_choice == 1: # Insertion sort sort_function = dict_quad_sorts.insertion_sort elif sort_choice == 2: # Bubble sort sort_function = dict_quad_sorts.bubble_sort elif sort_choice == 3: # Bubble sort (opt) sort_function = dict_quad_sorts.bubble_sort_opt else: # Selection sort sort_function = dict_quad_sorts.selection_sort # Do the sort. print(f"Sorting on key '{sort_on_key}'...") sort_function(data, sort_on_key) print("Done.") # Show it worked. long_list_printer.print_list(data, 5) elif choice == 2: # Show a selection of the data. long_list_printer.print_list(data, 10) elif choice == 3: # Search for a specific value. search_val = input(f"\nSearch for what {sort_on_key}? ") found_at = dict_bin_search.search(data, sort_on_key, search_val) if found_at is None: print(f"{sort_on_key} {search_val} not found.") else: print(f"{sort_on_key} {search_val} found at position "\ f"{found_at}.")
from web_scraper import scrape from xlsx_writer import save_file try: scrape() finally: save_file()
def main(): if args.mode == "scan": if "books.csv" not in os.listdir(root) or "books" not in os.listdir( root): sys.exit( "\nThere is no data to scan. Run the script with scrape mode first." ) else: try: start = time.time() mapper = WordMapper( ) # Creating the WordMapper object loads every word in the disk to ram. end = time.time() logging.info( "{} words are loaded into memory in {} seconds.".format( mapper.word_count(), (end - start))) mapper.map_words() # This will create mappings for every word. prefixes, suffixes = WordMapper.stats() logging.info("{} words are mapped to {} suffixes.".format( prefixes, suffixes)) os.chdir("..") with open(args.output, "wb") as output: pickle.dump( mapper.word_mappings, output, pickle.HIGHEST_PROTOCOL) # Saving the mappings. logging.info("Mappings written to {} in {}.".format( args.output, os.getcwd())) except KeyboardInterrupt: sys.exit(logging.error("Exiting current process.")) elif args.mode == "scrape": if "books.csv" not in os.listdir( root): # If books.csv doesn't exist, creating it first. logging.info("Writing books.csv ") top_book_urls = web_scraper.scrape() # Getting the list of urls. df = web_scraper.to_df(top_book_urls) # Adding them to df. web_scraper.save(df) # Saving df to csv. else: # If books.csv exists, but books directory doesn't exist. df_books = pd.read_csv("books.csv") if not os.path.exists( web_scraper.BOOKS_DIR ): # Creating the directory, if it doesn't exist. os.makedirs(web_scraper.BOOKS_DIR) count = 0 for i in df_books.index: # Iterating the .csv file and writing books to disk. if df_books.iloc[i, 0] + ".txt" in os.listdir( web_scraper.BOOKS_DIR): logging.error("{} is already written to the disk.".format( df_books.iloc[i, 0])) else: try: try: web_scraper.write_book(df_books.iloc[i, 1], df_books.iloc[i, 0]) count += 1 except TypeError: logging.error( "Unable to write {} to the disk.\n".format( df_books.iloc[i, 0])) except KeyboardInterrupt: logging.info( "{} books written to the disk in the current process.\n" .format(count)) sys.exit( logging.error( "There are {} books in the disk.\n".format( sum(1 for _ in os.listdir( web_scraper.BOOKS_DIR))))) elif args.mode == "chain": try: with open(args.input, "rb") as input_file: mapping = pickle.load(input_file) chain = word_chainer.chain_words(mapping, args.word) print(chain) except FileNotFoundError: sys.exit( logging.error("File {} is not found in {}.".format( args.input, os.getcwd())))