def spider_current_level(page): dirnames = [] levelsoup = BeautifulSoup(page.text, 'html.parser') try: spans = levelsoup.findAll('span', {'class': "css-truncate css-truncate-target"}) for s in spans: subtags = s.findAll('a', {'class': "js-navigation-open"}, href=True) for st in subtags: if '/blob/' in st['href']: lnk = st['href'].replace('blob/', '') if verbosity == 'on': lib.PrintStatus(f"file: {lnk}") full = baseraw + lnk fileaddrs.append(full) else: if verbosity == 'on': lib.PrintStatus(f"dir: {st['href']}") dirnames.append(st['href']) if len(dirnames) == 0: if verbosity == 'on': lib.PrintStatus("Branch exhausted") else: for subdir in dirnames: subdir_addr = baselink + subdir subdir_page = connect(subdir_addr) spider_current_level(subdir_page) except AttributeError: # TODO: find and fix lib.PrintFailure("Unusual file behavior detected, ending spidering with current resources...")
def load_config(): if isdir(f'{curdir}/KRconfig') is False: lib.PrintError(f"Config directory not detected in {curdir}...") lib.PrintStatus(f"Making config directory in {curdir}...") mkdir(f'{curdir}/KRconfig') config_files = {} count = 0 onlyfiles = [f for f in listdir(f'{curdir}/KRconfig') if isfile(join(f'{curdir}/KRconfig', f))] for file in onlyfiles: if file.endswith('.ini'): count += 1 config_files[file] = count if count == 0: lib.PrintStatus("No config files detected, making default...") with codecs.open(f'{curdir}/KRconfig/defaultconfig.ini', 'w', 'utf-8') as dconf: dconf.write( '''[initial_vars] displaymode = b [scraping_vars] scrape_input_method = m limiter = 5 repo_crawl = False link_type = regular directory_filtering = False blacklisted_directories = [] verbosity = off''') config_files['Default Configuration'] = 1 count += 1 for k in config_files.keys(): print(f"[{config_files[k]}]: {k}") while True: try: load_choice = int(input("Select which config file to load: ")) if load_choice > count: raise ValueError elif load_choice == "": pass continue else: break except ValueError: lib.PrintFailure("Invalid Input. Please enter the integer that corresponds with the desired config file.") continue for k in config_files.keys(): if load_choice == config_files[k]: selected_file = k parser.read(f"{curdir}/KRconfig/{selected_file}", encoding='utf-8') # Initial Variables displaymode = parser.get('initial_vars', 'displaymode') # Scraping Variables scrape_input_method = parser.get('scraping_vars', 'scrape_input_method') limiter = int(parser.get('scraping_vars', 'limiter')) repo_crawl = parser.getboolean('scraping_vars', 'repo_crawl') link_type = parser.get('scraping_vars', 'link_type') directory_filtering = parser.getboolean('scraping_vars', 'directory_filtering') blacklisted_directories = parser.get('scraping_vars', 'blacklisted_directories') verbosity = parser.get('scraping_vars', 'verbosity') return displaymode, scrape_input_method, limiter, repo_crawl, link_type, directory_filtering, blacklisted_directories, verbosity
def shodan_search(displaymode, page, repo_crawl, verbosity): if repo_crawl is False: lib.PrintStatus("Searching for Shodan keys...") elif repo_crawl is True and verbosity == 'on': lib.PrintStatus("Searching for Shodan keys...") shodan_pattern = r'\b[a-zA-Z0-9]{32}\b' pagetext = page.text keyset = [] for k in re.findall(shodan_pattern, pagetext): keyset.append(k) if not keyset: if repo_crawl is False: lib.PrintFailure("No valid shodan keys found in set.") elif repo_crawl is True and verbosity == 'on': lib.PrintFailure("No valid shodan keys found in set.") else: valid_paid_keys = {} valid_unpaid_keys = [] for key in set(keyset): api = shodan.Shodan(key) try: keydata = api.info() usage_limits = keydata['usage_limits'] if keydata['plan'] == 'dev' or keydata['plan'] == 'edu': credits_tuple = (usage_limits['scan_credits'], usage_limits['query_credits']) valid_paid_keys[key] = credits_tuple elif keydata['plan'] == 'oss': valid_unpaid_keys.append(key) except Exception: pass if displaymode == 's' or displaymode == 'b': shodan_output = f'{curdir}/Output/ShodanKeys.txt' if not exists(dirname(shodan_output)): try: makedirs(dirname(shodan_output)) except OSError as racecondition: if racecondition.errno != errno.EEXIST: raise with open(shodan_output, 'a') as sofile: sofile.write('----------VALID KEYS----------') for pkey in valid_paid_keys.keys(): sofile.write(f"Key: {pkey}\nCredits (scan, query): {valid_paid_keys[pkey][0]}, {valid_paid_keys[pkey][1]}\n\n") sofile.write('----------UNPAID KEYS----------') for upkeys in set(valid_unpaid_keys): sofile.write(f'Key: {upkeys}')
def scrape(scrape_input_method, displaymode, limiter, repo_crawl, verbosity): if scrape_input_method.lower() == 'm': url = input("Enter the URL: ") urlpage = connect(url) if urlpage == 'connection failed': lib.PrintError( "Connection to specified URL could not be established.") exit() else: lib.PrintStatus('Status: [200], Searching for API Keys...') if repo_crawl is False: search_execute(displaymode, urlpage) else: repository_list = get_repos(url) file_addresses = traverse_repos(repository_list, verbosity) executor = ThreadPoolExecutor(max_workers=len(file_addresses)) for addr in set(file_addresses): urlpage = connect(addr) executor.submit(search_execute(displaymode, urlpage)) sleep(limiter) lib.PrintSuccess("Scanning complete.") else: while True: url_file = input("Enter the full path to the input file: ") if isfile(url_file) is True: break elif str(url_file) == "": lib.DoNothing() else: lib.PrintError("No Such File Found.") continue with open(url_file) as ufile: count = 0 for line in ufile.readlines(): if repo_crawl is False: count += 1 urlpage = connect(line.rstrip()) if urlpage == 'connection failed': lib.PrintFailure( f"[Line: {count}] Connection failed on host {line}" ) else: search_execute(displaymode, urlpage) sleep(limiter) else: repository_list = get_repos(line) file_addresses = traverse_repos(repository_list, verbosity) executor = ThreadPoolExecutor( max_workers=len(file_addresses)) for addr in set(file_addresses): urlpage = connect(addr) executor.submit(search_execute(displaymode, urlpage)) sleep(limiter)
def load_config(): while True: if isdir(f'{curdir}/KRconfig') is False: lib.PrintError(f"Config directory not detected in {curdir}...") lib.PrintError(f"Please move KRconfig directory into {curdir}") cont = input('Continue? [y/n]: ') if cont.lower() == 'y': continue elif cont.lower() == 'n': exit() elif cont == "": lib.DoNothing() else: lib.PrintFailure("Invalid Input") continue else: break config_files = {} count = 0 onlyfiles = [f for f in listdir(f'{curdir}/KRconfig') if isfile(join(f'{curdir}/KRconfig', f))] for file in onlyfiles: if file.endswith('.ini'): count += 1 config_files[file] = count if count == 0: lib.PrintStatus("No config files detected, making default...") with codecs.open(f'{curdir}/KRconfig/defaultconfig.ini', 'w', 'utf-8') as dconf: dconf.write( '''[initial_vars] displaymode = b [scraping_vars] scrape_input_method = m limiter = 5 repo_crawl = False verbosity = off''') config_files['Default Configuration'] = 1 count += 1 for k in config_files.keys(): print(f"[{config_files[k]}]: {k}") while True: try: load_choice = int(input("Select which config file to load: ")) if load_choice > count: raise ValueError break except ValueError: lib.PrintFailure("Invalid Input. Please enter the integer that corresponds with the desired config file.") continue for k in config_files.keys(): if load_choice == config_files[k]: selected_file = k parser.read(f"{curdir}/KRconfig/{selected_file}", encoding='utf-8') # Initial Variables displaymode = parser.get('initial_vars', 'displaymode') # Scraping Variables scrape_input_method = parser.get('scraping_vars', 'scrape_input_method') limiter = int(parser.get('scraping_vars', 'limiter')) repo_crawl = parser.get('scraping_vars', 'repo_crawl') if repo_crawl == str('True'): repo_crawl = True else: repo_crawl = False verbosity = parser.get('scraping_vars', 'verbosity') return displaymode, scrape_input_method, limiter, repo_crawl, verbosity
def scrape(scrape_input_method, displaymode, limiter, repo_crawl, link_type, directory_filtering, blacklisted_directories, verbosity): if scrape_input_method.lower() == 'm': url = input("Enter the URL: ") if url[len(url)-1] == ' ': url = url[:len(url)-1] urlpage = connect(url) if urlpage == 'connection failed': lib.PrintError("Connection to specified URL could not be established.") exit() else: lib.PrintStatus('Status: [200], Searching for API Keys...') if repo_crawl is False: better_search_execute(displaymode, urlpage, repo_crawl, verbosity) else: if link_type == 'profile': resources = get_repos(url) file_addresses = traverse_repos(resources, link_type, directory_filtering, blacklisted_directories, verbosity) elif link_type == 'repo': file_addresses = traverse_repos(url, link_type, directory_filtering, blacklisted_directories, verbosity) if len(file_addresses) > 0: executor = ThreadPoolExecutor(max_workers=len(file_addresses)) else: lib.PrintError("Fatal Error: No File Addresses Were Returned") lib.PrintError("This is likely a mistyped, but valid, URL in the input.") lib.PrintError("This also occurs if a github repo link is provided when the profile option is enabled, or vice versa") exit() for addr in set(file_addresses): urlpage = connect(addr) executor.submit(better_search_execute(displaymode, urlpage, repo_crawl, verbosity)) sleep(limiter) lib.PrintSuccess("Scanning complete.") else: while True: url_file = input("Enter the full path to the input file: ") if isfile(url_file) is True: break elif str(url_file) == "": pass else: lib.PrintError("No Such File Found.") continue with open(url_file) as ufile: count = 0 for line in ufile.readlines(): if repo_crawl is False: count += 1 urlpage = connect(line.rstrip()) if urlpage == 'connection failed': lib.PrintFailure(f"[Line: {count}] Connection failed on host {line}") else: better_search_execute(displaymode, urlpage, repo_crawl, verbosity) sleep(limiter) else: if link_type == 'profile': resources = get_repos(line) elif link_type == 'repo': resources = line file_addresses = traverse_repos(resources, link_type, directory_filtering, blacklisted_directories, verbosity) executor = ThreadPoolExecutor(max_workers=len(file_addresses)) for addr in set(file_addresses): urlpage = connect(addr) executor.submit(better_search_execute(displaymode, urlpage, repo_crawl, verbosity)) sleep(limiter)