def login_hackerrank(driver: object, home_dir) -> object: """Returns driver after login to hackerrank.com. """ logging.debug("WebPageInfo - logging into Hackerrrank.") driver.get(WebPageInfo.login_url) account_name, account_pw = WebPageInfo.read_account_info(home_dir) driver.find_element_by_id('input-1').send_keys( account_name) ## Username driver.find_element_by_id('input-2').send_keys(account_pw) ## Password driver.find_element_by_class_name( 'checkbox-input').click() ## Remember ## Press login button try: print("logging in:") print("xpath element...") driver.find_element_by_xpath( WebPageInfo.loginbutton_xml).click() ## Login button except Exception as e: print("html class...") driver.find_element_by_class_name( WebPageInfo.loginbutton_html_class).click() time.sleep(0.5) return driver
def get_challenge_url(self, challenge_info: dict, saved_info) -> str: """Returns challenge's url using google library. NOTE: Running into HTTP 429 errors. 3 second pause has found success (2 is recommended). May like to investigate Google-api library: https://towardsdatascience.com/current-google-search-packages-using-python-3-7-a-simple-tutorial-3606e459e0d4 """ progress_statement = f"Collecting url for {self.name}" logging.debug(progress_statement) print(progress_statement) search_items = "site: hackerrank.com Challenges {}".format(self.name) if random.random() < 0.2: time.sleep(5) # arb wait try: urls = search(query=search_items, num=1, start=0, stop=1, pause=3) url = next(urls) except Exception as e: saved_info.save_df() raise Exception(e) logging.info(url) url = url.replace("/forum", "/problem") logging.debug(f"URL: {url}") return url
def get_score(self) -> str: """Returns score of problem from text.""" score_str, success_str = "Score: ", "Success" score = self.text[self.text.find(score_str) + len(score_str):self.text.find(success_str)] logging.debug(f"Score: {score}") return score
def add_entry( self, parsed_info: object, # InfoParser - circular import error completed: int = 0, todo: int = 1) -> None: """Adds entry to the dataframe to save to scs Args: parsed_info (InfoParser): Information about the challemge completed (int, optional): If challenge is completed. Defaults to 0. todo (int, optional): If challenge is to be done. Defaults to 1. NOTE: Makes a full copy of the data & thus slower performance. Later on, may like to implement so identify all problems that need to be added, and then add them all at once. """ logging.debug(f"- Adding {parsed_info.num} {parsed_info.name} to df") data = { self.DF_HEADERS[0]: parsed_info.num, self.DF_HEADERS[1]: parsed_info.name, self.DF_HEADERS[2]: parsed_info.score, self.DF_HEADERS[3]: parsed_info.difficulty, self.DF_HEADERS[4]: parsed_info.rate, self.DF_HEADERS[5]: parsed_info.url, self.DF_HEADERS[6]: parsed_info.solution_url, self.DF_HEADERS[7]: completed, self.DF_HEADERS[8]: todo } # new_row = pd.DataFrame(data, index = [0]) self.df = self.df.append(data, ignore_index=True) return
def save_df(self) -> None: """Saves df to csv.""" if self.start_rows == len(self.df): logging.debug("DF - no changes.") else: logging.debug("DF - saving changes.") self.df.to_csv(self.CSV_FILENAME, index=False) return
def __init__(self, url: str, scrape: bool = True): """Init webbpage object """ logging.debug(f"WebPageInfo - opening {url}") self.url = url self.open_url() if scrape: self.challenge_info = self.get_challenge_info()
def __init__( self, domain: str, subdomain: str, subdir: str, challenge_num: int, num_challenges: int, filetype: str, text: str, challenge_info: dict, saved_info: SavedInfo, ): """Initializes class with information to write to markdown table. Args: domain (str): Challenges domain, e.g, statistics or SQL subdomain(str): Sub domain, e.g., Basic Select subdir (str): Sub-directory, e.g., 1_basic_select challenge_num (int): Number Challenges num_challenges (int): Total number of challenges filetype (str): file extension, e.g., .py text (str): Text to parse challenge_info (dict): Contains information about challenges. Used for urls to avoid excessive searches. saved_info (SavedInfo): Class containing saved challenge information. """ ## Log logging.info( f"Parsing information for {domain} - {subdir}, {challenge_num}") logging.debug(f"Parsing text:\n\n{text}\n\n") ## Directory self.domain = domain self.subdomain = subdomain self.subdir = subdir ## Used to create filename self.total = str(num_challenges) self.num = self.get_challenge_num(challenge_num) self.filetype = filetype ## Text to parse self.text = text ## Table information self.name = self.get_name() ## Check if challenge already located in dataframe. self.chall_saved = saved_info.check_name_in_df(self.name) if self.chall_saved: logging.debug(f"NOTE: {self.name} already saved\n\n") return self.filename = self.get_filename() self.difficulty = self.get_difficulty() self.score = self.get_score() self.rate = self.get_rate() self.url = self.get_challenge_url(challenge_info, saved_info) self.solution_url = self.get_solution_url()
def remove_special_chars(name: str) -> str: """Aux func to remove special characters from file name.""" special_chars = [ ' ', '<', '>', ':', '"', "'", '/', '\\', '|', '?', '*', ',', '!', '#' ] for char in special_chars: name = name.replace(char, '') logging.debug(f'No special chars: {name}') return name
def read_account_info(home_dir) -> Tuple[str, str]: """Returns Tuple representing email & account password. Returns: Tuple[str, str]: Email & password """ logging.debug("WebPageInfo - reading account information.") filename = home_dir / 'automation' / WebPageInfo.filename_accountinfo with open(filename) as infile: return infile.readlines()
def get_challenge_info(self) -> list: """Gets all challenge problem information from the URLs""" logging.debug("WebPageInfo - getting challenge info.") if WebPageInfo.LOCATE_BY == "class": challenge_info = WebPageInfo.driver.find_elements_by_class_name( WebPageInfo.CHALLENGE_CLASSNAME) else: raise Exception("Must indicate how to locate HTML elements.") if len(challenge_info): return challenge_info raise Exception("Could not locate elements!")
def load_df(self) -> "dataframe": """Load dataframe into memory. Checks if .csv exists, otherwise creates. """ if os.path.exists(self.CSV_FILENAME): logging.debug("DF - loading") return pd.read_csv(self.CSV_FILENAME) else: logging.debug("DF - creating") data = {header: [] for header in self.DF_HEADERS} return pd.DataFrame(data)
def start_driver(cls, home_dir): """Sets Firefox driver object. NOTE: If running the remote driver, need to create standalone server. Run the following command on cmd >>> java -jar selenium-server-standalone-3.141.0.jar -port 4446 s ## Remote server # driver = webdriver.Remote( # command_executor='http://127.0.0.1:4446/wd/hub', # desired_capabilities=DesiredCapabilities.FIREFOX) """ logging.debug("WebPageInfo: Opening Driver.") cls.driver = WebPageInfo.login_hackerrank(webdriver.Firefox(), home_dir)
def check_name_in_df(self, challenge_name: str) -> bool: """Checks if the challenge naem is in the dataframe. Args: challenge_name (str): Challenge name Returns: bool: If name in dataframe. NOTE: - Use name because more likely to not change. - hackerrank may change order of numbers. """ logging.debug(f"- Locating... {challenge_name}") if len(self.df.name) == 0: # can't check empty df return False return self.df.name.str.contains(challenge_name).any()
def get_filename(self) -> str: """Returns file name for hackerrank problem.""" def remove_special_chars(name: str) -> str: """Aux func to remove special characters from file name.""" special_chars = [ ' ', '<', '>', ':', '"', "'", '/', '\\', '|', '?', '*', ',', '!', '#' ] for char in special_chars: name = name.replace(char, '') logging.debug(f'No special chars: {name}') return name name = remove_special_chars(self.name) filename = ''.join([ self.num, "_", name.replace(' ', '_').lower(), '.', self.filetype ]) logging.debug(f"Filename: {filename}") return filename
def get_challenge_num(self, num) -> int: """Sets challenge number.""" num_zeros = len(self.total) - len(str(num)) num = '0' * num_zeros + str(num) logging.debug(f"Challenge num {num}") return num
def get_name(self) -> str: """Returns name of problem from text.""" name = self.text[0:self.text.find("\n")] logging.debug(f"Name {name}") return name
def get_difficulty(self) -> str: """Returns difficulty of problem from text.""" difficulty = self.text[self.text.find("\n") + 1:self.text.find("Max")] difficulty = difficulty if difficulty != 'The challenge is not available yet' else 0 # for locked problems logging.debug(f"Difficulty: {difficulty}") return difficulty
def get_solution_url(self) -> str: """Returns url of github solution from text.""" solution_url = InfoParser.solution_url_BASE.format( self.domain, self.subdir, self.filename) logging.debug(f"solution_url: {solution_url}") return solution_url
def get_rate(self) -> str: """Returns success rate of problem from text.""" rate_str = "Rate: " rate = self.text[self.text.find(rate_str) + len(rate_str):] logging.debug(f"Rate: {rate}") return rate
def scrape_info(): """ For each domain: For each subdomain: - load subdomain info - scrape challenge info - parse challenge info - save challenge info - create relevant file - save subdomain info """ ## Home dir home_dir = Path(__file__).resolve().parents[1] logging.info(f"Directory - Home: {home_dir}") ## Driver WebPageInfo.start_driver(home_dir) ## Change to domain directory for domain in problem_domains: domain_dir = home_dir / domain.name aux_funcs.change_dir(domain_dir) ## Make readme file_funcs.make_readme_setup(domain.total_url, domain.name, heading=1) ## For each subdomain in Domain subdomain_num = 0 for subdomain, url in domain.info.items(): ## Change subdomain subdomain_num += 1 subdir = Path( aux_funcs.get_subdomain_dirname(subdomain_num, len(domain.info.items()), subdomain)) aux_funcs.change_dir(subdir) ## Make pre-readme file file_funcs.make_readme_setup(url, domain.name, subdomain, heading=2) ## Load Challenge information saved_info = SavedInfo(subdomain) ## Scrape challenge information challenge_page = WebPageInfo(url=url) ## Parse information challenge_num = 0 for challenge in challenge_page.challenge_info: is_chall, text = logic.is_challenge(challenge) if not is_chall: continue ## Parse information challenge_num += 1 logging.info(f"Parsing - {challenge_num}") logging.debug(text) parsed_info = InfoParser( domain=domain.name, subdomain=subdomain, subdir=subdir, challenge_num=challenge_num, num_challenges=len(challenge_page.challenge_info), filetype=domain.filetype, text=text, challenge_info={}, saved_info=saved_info, ) ## Make new file & add entry if not parsed_info.chall_saved: file_funcs.make_file(parsed_info.filename, parsed_info.name, parsed_info.url) saved_info.add_entry(parsed_info) ## Save subdir information saved_info.save_df() ## Domain Dir logging.info(f"DIR - {domain_dir}") os.chdir(domain_dir) ## Update repo aux_funcs.update_github( home_dir, commit_msg=f"Added challenges for: {domain.name}") ## Home dir logging.info(f"DIR - {home_dir}") os.chdir(home_dir)
def __post_init__(self): # Name logging.debug(f"Creating {self.info['name']} DomainInfo") self.name = self.info.pop('name') self.filetype = self.info.pop('filetype') self.total_url = self.info.pop('total_url')