def __execute_query_test(self, dirindex, chunks_info_in_memory): '''Ejecuta query test del índice especificado. Args: dirindex (string): directorio de índice. chunks_info_in_memory (bool): indica si la información de chunks se debe cargar en RAM. Returns: index (Index): índice al que pertenecen las estadísticas. min_times (double list): listado de tiempo mínimo de query. avg_times (double list): listado de tiempo promedio de query. max_times (double list): listado de tiempo máximo de query. ''' index = Index(dirindex) self.__check_queries_load() browser = Browser(index, BrowseType.Boolean) # Listados de stats. min_times = [] avg_times = [] max_times = [] index.load(chunks_info_in_memory) index_name = path.basename(dirindex) evtype = "inmemory" if chunks_info_in_memory else "indisk" queries_count = len(self.__queries) iterations = self.__iterations + 1 for i in range(0, iterations): info = "{0} ({1}) - evaluación nro. {2}/{3}" print(info.format(index_name, evtype, i, iterations)) min_time = None sum_time = 0 max_time = 0 for query in self.__queries: # Obtención de posting list. browser.browse(query) # Obtención de benchmark de última búsqueda. time = browser.get_benchmark() sum_time += time if time > max_time: max_time = time if not min_time or time < min_time: min_time = time avg_time = sum_time / queries_count # Append de times (si no es 'warm-up'). if i > 0: min_times.append(min_time * 1000) avg_times.append(avg_time * 1000) max_times.append(max_time * 1000) return index, min_times, avg_times, max_times
def main(): browser = Browser() with browser.new_page() as tab: data = tab.collect_data( "https://news.ycombinator.com/front?day=2020-03-20") layout_parser = LayoutParser(data["snapshot"], data["tree"], data["screenshot"]) layout_parser.build_rtree_index() layout_parser.build_dom_index() write_json(layout_parser.dom_index, "dom-build.json") # write_image(layout_parser.screenshot["data"], "page.png") layout_parser.create_report1()
def index(): browser = Browser() report = None with browser.new_page() as tab: print("Loading page") data = tab.collect_data("https://news.ycombinator.com/") layout_parser = LayoutParser(data["snapshot"], data["tree"], data["screenshot"]) print("Building rtree") layout_parser.build_rtree_index() print("Building dom tree") layout_parser.build_dom_index() write_image(layout_parser.screenshot["data"], "page.png") print("Creating report") report = layout_parser.create_report1() return render_template("index.html", report=report)
def parse_resp(self, browser: Browser, proxy_sig: str) -> None: '''Parse the response of the browser. ''' if not self.is_alive or browser.is_processed: return browser.is_processed = True if browser.is_authenticated: self.account_password = browser.password elif browser.password_attempted and not browser.proxy_failed: self.attempts += 1 if browser.proxy_failed: proxy = self.proxies_browsers[proxy_sig] self.proxies.proxy_expired(browser.proxy) proxy.is_expired = True self.add_password(browser.password) with self.lock_expired_proxies: self.expired_proxies.put(proxy_sig) self.active_browsers -= 1
def create_browsers(self, password: str, proxy_sig: str, csrftoken: str) -> None: '''Creates a browser and trys to execute it. ''' try: proxy = self.proxies_browsers[proxy_sig].proxy browser = Browser(self.username, password, proxy, csrftoken) except KeyError: self.add_password(password) return try: Thread(target=browser.login, daemon=True).start() self.active_browsers += 1 if self.verbose and self.is_alive: with self.lock_display: print('[-] Trying: {} ...'.format( password )) except: return try: with self.lock_proxies_browsers: self.proxies_browsers[proxy_sig].add_browser( browser ) except: self.add_password(password) self.active_browsers -= 1
def main(args): '''Punto de entrada de app.''' utils.clearscreen() index1 = Index("../output/test/index-1/") index2 = Index("../output/test/index-2/") print("Cargando índice 1...") index1.load() print("Cargando índice 2...") index2.load() # Browser en índice resultante. index1_browser = Browser(index1, BrowseType.Boolean) index2_browser = Browser(index1, BrowseType.Boolean) while True: text = input("\nBúsqueda: ").lower().strip() for i in range(0, 2): if i == 0: doc_ids = index1_browser.browse(text) print("Búsqueda en índice 1 (total {0}):".format(len(doc_ids))) doc_ids1 = doc_ids else: print("") doc_ids = index2_browser.browse(text) print("Búsqueda en índice 2 (total {0}):".format(len(doc_ids))) if doc_ids != doc_ids1: input("ATENCIÓN: Las postings son distintas...") doc_number = 1 if not doc_ids: msg = "Ningún documento coincide con la búsqueda." print(msg) else: # Impresión de primeros 25 docs. for doc_id in doc_ids[0:25]: if i == 0: doc = index1.get_doc_by_id(int(doc_id)) else: doc = index2.get_doc_by_id(int(doc_id)) sdoc = str(doc_number) + ". " + doc + " ({0})" print(sdoc.format(doc_id)) doc_number += 1 print("") if i == 0: print("Benchmark:", index1_browser.get_benchmark(), "segundos") else: print("Benchmark:", index2_browser.get_benchmark(), "segundos") input("\nPresione una tecla para continuar...") utils.clearscreen()
def attack(self): attack_started = False proxy_per_pwd = 3 while self.is_alive: for pwd in self.password_manager.passlist: if not self.is_alive: break with self.lock_unstarted_browsers: if len(self.unstarted_browsers) >= self.total_threads: break with self.lock_active_passwords: if pwd in self.active_passwords: continue is_added = False for _ in range(proxy_per_pwd): with self.lock_unstarted_browsers: if len(self.unstarted_browsers) >= self.total_threads: break proxy = self.proxy_manager.get_proxy() if not proxy: continue with self.lock_unstarted_browsers: self.unstarted_browsers.append( Browser(self.username, pwd, proxy)) is_added = True if not is_added: break with self.lock_active_passwords: self.active_passwords.append(pwd) if not attack_started: self.display.info("Starting attack...") attack_started = True with self.lock_unstarted_browsers: for br in list(self.unstarted_browsers): with self.lock_browsers: if len(self.browsers) >= self.total_threads: break else: self.browsers.append(br) self.unstarted_browsers.remove(br) threading.Thread(target=br.attempt, daemon=True).start()
def main(args): '''Punto de entrada de app.''' utils.clearscreen() index = Index("../output/test/index-1/") print("Cargando índice...") index.load(chunks_info_in_memory=False) # Browser en índice resultante. browser = Browser(index, BrowseType.Boolean) while True: text = input("\nBúsqueda: ").lower().strip() print("") doc_ids = browser.browse(text) print("Búsqueda en índice (total {0}):".format(len(doc_ids))) doc_number = 1 if not doc_ids: msg = "Ningún documento coincide con la búsqueda." print(msg) else: # Impresión de primeros 25 docs. for doc_id in doc_ids[0:25]: doc = index.get_doc_by_id(int(doc_id)) sdoc = str(doc_number) + ". " + doc + " ({0})" print(sdoc.format(doc_id)) doc_number += 1 print("") print("Benchmark:", browser.get_benchmark(), "segundos") input("\nPresione una tecla para continuar...") utils.clearscreen()
def attack(self): proxy = None is_attack_started = False while self.is_alive: browsers = [] for password in self.password_manager.passlist: if not self.is_alive: break if not proxy: proxy = self.proxy_manager.get_proxy() self.bots_per_proxy = 0 if self.bots_per_proxy >= max_bots_per_proxy: proxy = None if not proxy: continue if not password in self.active_passwords and password in self.password_manager.passlist: browser = Browser(self.username, password, proxy) browsers.append(browser) self.bots_per_proxy += 1 if not is_attack_started: self.display.info('Starting attack ...') is_attack_started = True with self.lock: self.browsers.append(browser) self.active_passwords.append(password) for browser in browsers: thread = Thread(target=browser.attempt) thread.daemon = True try: thread.start() except: self.remove_browser(browser)
import os import logging import time import sys from lib.browser import Browser from lib.flow import Flow logging.basicConfig( level=logging.INFO, format='[%(asctime)s][%(filename)s:%(lineno)d][%(levelname)s] %(message)s', handlers=[logging.StreamHandler(sys.stdout)]) log = logging.getLogger('Olsanka App') log.setLevel(logging.INFO) if __name__ == '__main__': browser = Browser(display_visible=False, display_size=(1000, 900)) flow = Flow(browser) flow.login_and_find_next_free_and_book() time.sleep(5) browser.close()
def main(args): '''Punto de entrada de app.''' utils.clearscreen() indexer = Indexer(DIRIN, CorpusTypes.Text, reuse_tmp=True) # Generación de índice 1. index1, index1_error = indexer.create_index(INDEX1_DIROUT, False, 64) # Generacón de índice 2 indexer.doc_encode = [ EncodeTypes.Unary, EncodeTypes.VariableByte, EncodeTypes.VariableByte, EncodeTypes.EliasFano, EncodeTypes.Simple16, EncodeTypes.BitPacking, EncodeTypes.PForDelta ] indexer.freq_encode = [EncodeTypes.Unary, EncodeTypes.VariableByte] index2, index2_error = indexer.create_index(INDEX2_DIROUT, False, 128) if index1_error == IndexerStatusTypes.Already_Indexed: print("Indice 1 ya indexado.") elif index1_error == IndexerStatusTypes.Collection_Non_Existent: print("Directorio de colección de índice 1 inexistente.") return if index2_error == IndexerStatusTypes.Already_Indexed: print("Indice 2 ya indexado.") elif index2_error == IndexerStatusTypes.Collection_Non_Existent: print("Directorio de colección de índice 2 inexistente.") return print("Cargando índice 1...") index1.load(chunks_info_in_memory=True) print("Cargando índice 2...") index2.load() # Browser en índice resultante. index1_browser = Browser(index1, BrowseType.Boolean) index2_browser = Browser(index2, BrowseType.Boolean) while True: text = input("\nBúsqueda: ").lower().strip() for i in range(0, 2): if i == 0: print("Búsqueda en índice 1:") doc_ids = index1_browser.browse(text) doc_ids1 = doc_ids else: print("") print("Búsqueda en índice 2:") doc_ids = index2_browser.browse(text) if doc_ids != doc_ids1: print("") print(doc_ids) print(doc_ids1) input( "ATENCIÓN: Las postings son distintas ({0})...".format( text)) doc_number = 1 if not doc_ids: msg = "Ningún documento coincide con la búsqueda." print(msg) for doc_id in doc_ids: if i == 0: doc = index1.get_doc_by_id(int(doc_id)) else: doc = index2.get_doc_by_id(int(doc_id)) sdoc = str(doc_number) + ". " + doc + " ({0})" print(sdoc.format(doc_id)) doc_number += 1 print("") if i == 0: print("Benchmark:", index1_browser.get_benchmark(), "segundos") else: print("Benchmark:", index2_browser.get_benchmark(), "segundos") input("\nPresione una tecla para continuar...") utils.clearscreen()
def before_all(context): print("==> Executing before_all") context.browser = Browser()