def __init__(self, **kwargs): agent_type = kwargs.setdefault('agent_type', None) referer = kwargs.setdefault('referer', None) self.headers = kwargs.setdefault( 'headers', HeaderGenerator().header(agent_type=agent_type, referer=referer)) self.timeout = kwargs.setdefault('timeout', 10) self.resp_attributes = kwargs.setdefault('resp_attributes', [ 'content', 'encoding', 'headers', 'history', 'html', 'json', 'ok', 'reason', 'status_code', 'text', 'url' ]) self.elem_attributes = kwargs.setdefault('elem_attributes', [ 'absolute_links', 'base_url', 'encoding', 'full_text', 'html', 'links', 'raw_html', 'text', 'url' ]) self._logging = kwargs.setdefault('logging', True) if self._logging: log_path = kwargs.setdefault('log_path', '.') log_file = kwargs.setdefault('log_file', 'out.log') log_name = kwargs.setdefault('log_name', __name__) self._logger = Logger(name=log_name, log_path=log_path, log_file=log_file) self.session = requests_html.HTMLSession() self._err_recs = []
def __init__(self, path, attributes, name='table', verbosity=0): self.WRITING_REQUESTS = [] self.READING_REQUESTS = {} self.UPDATE_REQUESTS = [] Logger.__init__(self, 'SQLite interface', verbosity=verbosity) self.db_path = 'sqlite:///%s/search_progress.db' % path self.attributes = attributes self.name = name self.log('creating database %s at %s' % (self.name, self.db_path), 'DEBUG') # create database self.db = sql.create_engine(self.db_path) self.db.echo = False self.metadata = sql.MetaData(self.db) # create table in database self.table = sql.Table(self.name, self.metadata) for name, att_type in self.attributes.items(): self.table.append_column( sql.Column(name, self.SQLITE_COLUMNS[att_type])) self.table.create(checkfirst=True) # start request processor self._process_requests()
def handleRequest(command, args): respMsg = {'type':'response', 'command':command, 'data':None} if command == 'scanWifi': println("Command recevied to scan wifi") apInfo = Wifi.findMhaDevices() if len(apInfo) < 1: Logger.log(Logger.LogLevel.error, "Unable to find any MHA Devices in accespoint mode") else: data = [] for ap in apInfo: data.append({'macaddr': Wifi.getMacAddr(ap), 'ssid':Wifi.getSsid(ap)}) respMsg['data'] = data Logger.log(Logger.LogLevel.info, "New APs:" + str(data)) publishMessage(json.dumps(respMsg)) if command == 'connectToWifi': println('Command Received to connec to Wifi') if 'ssid' in args: ssid = args['ssid'] macaddr = '' if 'macaddr' in args: macaddr = args['macaddr'] status = {'connectedToWifi': 'True'} if connectToWifi(ssid, macaddr) != 0: status['connectedToWifi'] = 'False' respMsg['data'] = status publishMessage(json.dumps(respMsg))
def __init__(self, mongo_address, model_name, total_indexes, current_index, n_neighbors=5, algorithm="brute", metric="euclidean"): self.logger = Logger() client = MongoClient(mongo_address) self.db = client.features self.records = self.load_data(model_name, total_indexes, current_index) features = list(map(lambda x: x["feature"], self.records)) urls = list(map(lambda x: x["url"], self.records)) self.logger.info( f"Indexing {len(urls)} records for model {model_name} using algorithm {algorithm}" ) self.neighbors = None if len(self.records): self.neighbors = NearestNeighbors(n_neighbors=min( n_neighbors, len(self.records)), algorithm=algorithm, metric=metric).fit(features) self.logger.info(f"Done indexing {len(self.records)} records")
def __init__(self, config): self.config = config Logger.__init__(self, 'Acquisition', self.config.get('verbosity')) self.random_sampler = RandomSampler(self.config.general, self.config.parameters) self.total_num_vars = len(self.config.feature_names) self.local_optimizers = None self.num_cpus = multiprocessing.cpu_count()
def __init__(self, attributes, entries=[], verbosity=0): Logger.__init__(self, 'DB_Cache', verbosity=verbosity) self.attributes = attributes self.cache = {attr: [] for attr in self.attributes} self.num_items = 0 for entry in entries: self.add(entry)
def __init__(self, config): self.config = config self.chimera = Chimera(self.config.obj_tolerances, self.config.get('softness')) Logger.__init__(self, 'ObservationProcessor', verbosity=self.config.get('verbosity')) # compute some boundaries self.feature_lowers = self.config.feature_lowers self.feature_uppers = self.config.feature_uppers self.soft_lower = self.feature_lowers + 0.1 * (self.feature_uppers - self.feature_lowers) self.soft_upper = self.feature_uppers - 0.1 * (self.feature_uppers - self.feature_lowers)
def __init__(self, config): self.config = config Logger.__init__(self, 'ParamOptimizer', verbosity = self.config.get('verbosity')) # parse positions self.pos_continuous = np.full(self.config.num_features, False, dtype = bool) for feature_index, feature_type in enumerate(self.config.feature_types): self.pos_continuous[feature_index] = True # set up continuous optimization algorithms cont_opt_name = self.config.get('continuous_optimizer') if cont_opt_name == 'adam': from Acquisition.NumpyOptimizers import AdamOptimizer self.opt_con = AdamOptimizer() else: PhoenicsUnkownSettingsError('did not understand continuous optimizer "%s".\n\tPlease choose from "adam"' % cont_opt_name)
def __init__(self, config_general, config_params): self.config_general = config_general self.config_params = config_params verbosity = self.config_general.verbosity if 'random_sampler' in self.config_general.verbosity: verbosity = self.config_general.verbosity['random_sampler'] Logger.__init__(self, 'RandomSampler', verbosity) if self.config_general.sampler == 'sobol': from RandomSampler.sobol import SobolContinuous self.continuous_sampler = SobolContinuous() elif self.config_general.sampler == 'uniform': from RandomSampler.uniform import UniformContinuous self.continuous_sampler = UniformContinuous() else: PhoenicsUnknownSettingsError('did not understanding sampler setting: "%s".\n\tChoose from "uniform" or "sobol"' % self.config_general.sampler)
class WebDriver: log = Log.func_logger() def init_driver(self): self.log.info("Opening in Chrome") return webdriver.Chrome("/Users/edgarnav/Documents/chromedriver")
def __init__(self, config, model_details=None): self.COUNTER = 0 self.has_sampled = False self.config = config verbosity = self.config.get('verbosity') if 'bayesian_network' in verbosity: verbosity = verbosity['bayesian_network'] Logger.__init__(self, 'BayesianNetwork', verbosity=verbosity) self.kernel_contribution = lambda x: (np.sum(x), 1.) # get bnn model detals if model_details == None: from BayesianNetwork.model_details import model_details self.model_details = model_details # set up bnn if self.config.get('backend') == 'tfprob': from BayesianNetwork.TfprobInterface import TfprobNetwork self.network_executable = '{}/BayesianNetwork/TfprobInterface/tfprob_interface.py'.format( self.config.get('home')) elif self.config.get('backend') == 'edward': from BayesianNetwork.EdwardInterface import EdwardNetwork self.network_executable = '%s/BayesianNetwork/EdwardInterface/edward_interface.py' % self.config.get( 'home') else: PhoenicsUnknownSettingsError( 'did not understand backend: "%s".\n\tChoose from "tfprob" or "edward"' % self.config_general.backend) # get domain volume self.volume = 1. feature_lengths = self.config.feature_lengths feature_ranges = self.config.feature_ranges for feature_index, feature_type in enumerate( self.config.feature_types): self.volume *= feature_ranges[feature_index] self.inverse_volume = 1 / self.volume # compute sampling parameter values if self.config.get('sampling_strategies') == 1: self.sampling_param_values = np.zeros(1) else: self.sampling_param_values = np.linspace( -1.0, 1.0, self.config.get('sampling_strategies')) self.sampling_param_values = self.sampling_param_values[::-1] self.sampling_param_values *= self.inverse_volume
class Index: def __init__(self, mongo_address, model_name, total_indexes, current_index, n_neighbors=5, algorithm="brute", metric="euclidean"): self.logger = Logger() client = MongoClient(mongo_address) self.db = client.features self.records = self.load_data(model_name, total_indexes, current_index) features = list(map(lambda x: x["feature"], self.records)) urls = list(map(lambda x: x["url"], self.records)) self.logger.info( f"Indexing {len(urls)} records for model {model_name} using algorithm {algorithm}" ) self.neighbors = None if len(self.records): self.neighbors = NearestNeighbors(n_neighbors=min( n_neighbors, len(self.records)), algorithm=algorithm, metric=metric).fit(features) self.logger.info(f"Done indexing {len(self.records)} records") def load_data(self, model_name, total_indexes, current_index): total_num_of_records = self.db[model_name].count() num_of_records_per_index = int(total_num_of_records / max(total_indexes, 1)) start_index = current_index * num_of_records_per_index self.logger.info( f"Load {num_of_records_per_index} / {total_num_of_records} records starting from index {start_index}" ) return list(self.db[model_name].find( {}).skip(start_index).limit(num_of_records_per_index)) def query(self, vector, size=10): if not self.neighbors: return [] distances, indices = self.neighbors.kneighbors([vector], min( len(self.records), size), return_distance=True) distances = distances[0] indices = indices[0] results = [] for i in range(len(indices)): distance = distances[i] index = indices[i] url = self.records[index]["url"] results.append({"distance": distance, "url": url}) return results def test(self): return self.records[1]["feature"]
def __init__(self, params): queue_name = params["queue_name"] model_url = params["model_url"] host_name = params.get("rabbitmq_hostname", "localhost") mongo_address = params.get("mongo_address", "localhost:27017") self.bucket_name = params["bucket_name"] self.deduplicate_model = params["deduplicate_model"] self.deduplicate_threshold = params["deduplicate_threshold"] self.logger = Logger() while True: try: if self.set_up_rabbitmq_connection(host_name, queue_name): break except Exception as e: self.logger.error( f"Failed to connect to rabbitmq queue {queue_name} at {host_name}. Reason: {e}" ) time.sleep(3) continue # start consuming (blocks) self.num_threads = 4 self.model_name = queue_name self.bucket_handler = Bucket(bucket_name) self.logger.info(f"Extract worker for model: {queue_name}") self.model = model_picker(queue_name, model_url) self.logger.info(f"Connecting to mongodb at {mongo_address}") client = MongoClient(mongo_address) self.db = client.features self.channel.start_consuming() self.connection.close()
def scanWifi(): availableWifiNetworks = [] shcmd = 'sudo iwlist ' + WIFI_INTERFACE + ' scan' response = subprocess.run(shcmd.split(' '), capture_output=True, timeout=30) if response.returncode == 0: output = response.stdout.decode("utf-8").split('\n') macAddr = '' ssid = '' for line in output: if 'Address' in line: macAddr = line.split('Address:')[-1][1:-1] if 'ESSID' in line: ssid = line.split(':')[-1][1:-1] if macAddr != '' and ssid != '': availableWifiNetworks.append(WifiAp(macAddr, ssid)) macAddr = '' ssid = '' else: Logger.log(Logger.LogLevel.error, "Wifi Scaning failed") return availableWifiNetworks
class ResultsPage(ElementsInteractions): def __init__(self, driver): super().__init__(driver) self.driver = driver log = Logger.func_logger() url_batman = "https://www.tvmaze.com/shows/975/batman" class_url = "white-text" class_back_btn = "btn-primary" def find_url_batman(self): elements = self.get_all_elements(self.class_url, "class") links = [element.get_attribute('href') for element in elements] index = links.index(self.url_batman) elements[index].click() def return_page(self): self.back_page() def click_back_btn(self): self.click_element(self.class_back_btn, "class")
class ElementsInteractions: log = Log.func_logger() def __init__(self, driver): self.driver = driver def locator(self, locator_type): if locator_type == "id": return By.ID elif locator_type == "name": return By.NAME elif locator_type == "class": return By.CLASS_NAME elif locator_type == "xpath": return By.XPATH elif locator_type == "tag": return By.TAG_NAME else: self.log.error("Locator Type : " + locator_type + " entered is not found") return False def verify_activity(self, activity_name): cont_max_time = 0 while cont_max_time > 10: time.sleep(1) cont_max_time += 1 if activity_name == self.driver.current_activity: self.log.info("Activity name match with: " + activity_name) break elif activity_name != self.driver.current_activity and cont_max_time == 10: self.take_screenshot(self.driver.activity_name) self.log.info("Activity name expected: " + activity_name) assert False def explicit_wait(self, locator_value, locator_type, max_time): try: locator_by_type = self.locator(locator_type) WebDriverWait(self.driver, max_time).until( ec.presence_of_all_elements_located( (locator_by_type, locator_value))) self.log.info("Element found with locator " + locator_value + " using locatorType " + locator_by_type) return True except Exception: self.log.error("Element not found with locator " + locator_value + " using locatorType " + locator_type) return False def get_element(self, locator_value, locator_type): element = None try: locator_by_type = self.locator(locator_type) element = self.driver.find_element(locator_by_type, locator_value) self.log.info("Element found with locator " + locator_value + " using locatorType " + locator_by_type) except Exception: self.log.error("Element not found with locator " + locator_value + " using locatorType " + locator_type) print_stack() return element def wait_element(self, locator_value, locator_type): try: locator_by_type = self.locator(locator_type) wait = WebDriverWait(self.driver, 25, poll_frequency=1, ignored_exceptions=[ ElementNotVisibleException, NoSuchElementException ]) element = wait.until( ec.presence_of_element_located( (locator_by_type, locator_value))) self.log.info("Element found with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error("Element not found with locator value " + locator_value + " using locatorType " + locator_type) print_stack() self.take_screenshot(locator_type) assert False return element def press_element(self, locator_value, locator_type): try: element = self.wait_element(locator_value, locator_type) element.click() self.log.info("Clicked on element with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error("Unable to Click on element with locator value " + locator_value + " using locatorType " + locator_type) print_stack() assert False def send_text(self, locator_value, locator_type, text): try: element = self.wait_element(locator_value, locator_type) element.send_keys(text) self.log.info("Sent the text " + text + " in element with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error("Unable to Sent the text " + text + " in element with locator value " + locator_value + "using locatorType " + locator_type) print_stack() self.take_screenshot(locator_type) assert False def get_text(self, locator_value, locator_type): element_text = None try: element = self.wait_element(locator_value, locator_type) element_text = element.text self.log.info("Got the text " + element_text + " from element with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error( "Unable to get the text from element with locator value " + locator_value + "using locatorType " + locator_type) print_stack() return element_text def get_attribute(self, locator_value, locator_type, attribute_name): attribute = None try: element = self.wait_element(locator_value, locator_type) attribute = element.get_attribute(attribute_name) self.log.info("Got the attribute " + attribute_name + " -> " + attribute + " from element with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error("Unable to get the attribute " + attribute_name + " from element with locator value " + locator_value + "using locatorType " + locator_type) print_stack() return attribute def is_element_displayed(self, locator_value, locator_type): element_displayed = None try: element = self.wait_element(locator_value, locator_type) element_displayed = element.is_displayed() self.log.info( "Element is Displayed on web page with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error( "Element is not Displayed on web page with locator value " + locator_value + " using locatorType " + locator_type) print_stack() return element_displayed def take_screenshot(self, text): pass
class Worker: def __init__(self, params): queue_name = params["queue_name"] model_url = params["model_url"] host_name = params.get("rabbitmq_hostname", "localhost") mongo_address = params.get("mongo_address", "localhost:27017") self.bucket_name = params["bucket_name"] self.deduplicate_model = params["deduplicate_model"] self.deduplicate_threshold = params["deduplicate_threshold"] self.logger = Logger() while True: try: if self.set_up_rabbitmq_connection(host_name, queue_name): break except Exception as e: self.logger.error( f"Failed to connect to rabbitmq queue {queue_name} at {host_name}. Reason: {e}" ) time.sleep(3) continue # start consuming (blocks) self.num_threads = 4 self.model_name = queue_name self.bucket_handler = Bucket(bucket_name) self.logger.info(f"Extract worker for model: {queue_name}") self.model = model_picker(queue_name, model_url) self.logger.info(f"Connecting to mongodb at {mongo_address}") client = MongoClient(mongo_address) self.db = client.features self.channel.start_consuming() self.connection.close() def set_up_rabbitmq_connection(self, host_name, queue_name): credentials = pika.PlainCredentials('admin', 'admin') self.connection = pika.BlockingConnection( pika.ConnectionParameters(host_name, credentials=credentials)) self.channel = self.connection.channel() self.channel.queue_declare(queue=queue_name, durable=True) self.channel_name = "features" self.channel.exchange_declare(exchange=self.channel_name, exchange_type="fanout", durable=True) self.channel.queue_bind(exchange=self.channel_name, queue=queue_name) self.channel.basic_qos(prefetch_count=20) # set up subscription on the queue self.channel.basic_consume(queue_name, self.process) return True def get_public_url(self, file_name): return f"https://storage.googleapis.com/{self.bucket_name}/{file_name}" def check_duplication(self, img_name, feature): response = requests.post( f"http://serving-{self.deduplicate_model}:5000/search?json=true", json=feature.tolist()) if response.status_code != 200: print(f"Deduplicate request fails for image {img_name}") return False result = response.json() if len(result) == 0: return False best_match = result[0]["distance"] is_duplicated = best_match <= self.deduplicate_threshold if is_duplicated: print(f"Image {img_name} already exists") self.channel.basic_publish(exchange="", routing_key="duplicated_files", body=img_name) return is_duplicated @FAILURE_COUNTER.count_exceptions() @REQUEST_TIME.time() def process(self, ch, method, properties, file_name): file_name = file_name.decode() print(f"Processing file {file_name}") downloaded_dir = "./tmp" local_file_path = self.bucket_handler.download(file_name, downloaded_dir) feature = extract_features(local_file_path, self.model) if self.deduplicate_model: is_duplicated = self.check_duplication(file_name, feature) if is_duplicated: self.channel.basic_ack(delivery_tag=method.delivery_tag) return self.db[self.model_name].insert_one({ "url": self.get_public_url(file_name), "feature": feature.tolist() }) self.channel.basic_ack(delivery_tag=method.delivery_tag)
class Crawler(object): """Provides utilities for retrieving website content.""" def __init__(self, **kwargs): agent_type = kwargs.setdefault('agent_type', None) referer = kwargs.setdefault('referer', None) self.headers = kwargs.setdefault( 'headers', HeaderGenerator().header(agent_type=agent_type, referer=referer)) self.timeout = kwargs.setdefault('timeout', 10) self.resp_attributes = kwargs.setdefault('resp_attributes', [ 'content', 'encoding', 'headers', 'history', 'html', 'json', 'ok', 'reason', 'status_code', 'text', 'url' ]) self.elem_attributes = kwargs.setdefault('elem_attributes', [ 'absolute_links', 'base_url', 'encoding', 'full_text', 'html', 'links', 'raw_html', 'text', 'url' ]) self._logging = kwargs.setdefault('logging', True) if self._logging: log_path = kwargs.setdefault('log_path', '.') log_file = kwargs.setdefault('log_file', 'out.log') log_name = kwargs.setdefault('log_name', __name__) self._logger = Logger(name=log_name, log_path=log_path, log_file=log_file) self.session = requests_html.HTMLSession() self._err_recs = [] def _push_error(self, error, url, comp_id=None, attr=None): c_id = str(comp_id) if comp_id else comp_id if self._logging: if c_id: msg = ('\nRequest for response from {} for company {} ' + 'threw exception: {}\n').format(url, c_id, error) elif attr: msg = ( '\nRequest for "{}" from {} threw exception: {}\n'.format( attr, url, error)) else: msg = ('\nRequest for response from {} threw exception: {}\n'. format(url, error)) self._logger.error(msg) self._err_recs.append({ 'time': strftime('%Y-%m-%d %H:%M:%S'), 'company_profile_id': c_id, 'attribute': attr, 'url': url, 'exception': error }) @error_trap def _get_response(self, url, headers, timeout, cookies): r = self.session.get(url, headers=headers, timeout=timeout, cookies=cookies) if r is None: return None, None, url else: if r.ok: if self._logging: self._logger.info(('\nOrig_URL: {}; Ret_URL: {}; ' + 'status: {}, reason: {}\n').format( url, r.url, r.status_code, r.reason)) return r, None, r.url else: if self._logging: self._logger.warning(('\nOrig_URL: {}; Ret_URL: {}; ' + 'status: {}, reason: {}\n').format( url, r.url, r.status_code, r.reason)) return r, r.reason, r.url def response(self, url, headers=None, timeout=None, cookies=None, c_id=None): headers = headers or self.headers timeout = timeout or self.timeout def flip_scheme(): u = furl(url) u.scheme = 'https' if u.scheme == 'http' else 'http' return u.url f_val, err = self._get_response(url, headers, timeout, cookies) if err or f_val[1] or f_val[0] is None: flipped_url = flip_scheme() f_val, err = self._get_response(flipped_url, headers, timeout, cookies) if err: self._push_error(err, flipped_url, comp_id=c_id) return None else: if f_val[0] is None: self._push_error('Response is NULL', flipped_url, comp_id=c_id) if f_val[1]: self._push_error(f_val[1], flipped_url, comp_id=c_id) return f_val[0] else: return f_val[0] @error_trap def _check_valid_get(self, obj, a): obj_type = type(obj) if obj_type == requests_html.HTMLResponse: assert a in self.resp_attributes, \ ('Second parameter must be one of: {}'. format(', '.join(self.resp_attributes))) elif ((obj_type == requests_html.HTML) or (obj_type == requests_html.Element)): assert a in self.elem_attributes, \ ('Second parameter must be one of: {}'. format(', '.join(self.elem_attributes))) else: raise TypeError('First parameter must be one of type ' + 'requests_html.HTMLResponse, ' + 'requests_html.HTML, or ' + 'requests_html.Element') return @error_trap def _get(self, obj, a): _, err = self._check_valid_get(obj, a) if err: if type(err) == AssertionError: u = self.get(obj, 'url') else: u = None self._push_error(err, u, attr=a) return None else: attr = getattr(obj, a) if a != 'json' else getattr(obj, a)() if attr is None: u = self.get(obj, 'url') if a != 'url' else None self._push_error('NULL attribute', u, attr=a) return attr def get(self, obj, a): attr, err = self._get(obj, a) if err: u, e = self._get(obj, 'url') if a != 'url' else None, None if e: self._push_error(e, u, attr='url') self._push_error(err, u, attr=a) return attr @error_trap def _write_errors(self, outfile): ft = outfile.split('.')[-1] assert (ft in ['pkl', 'xlsx', 'csv']), \ 'Output filename must specify a pickle (.pkl), ' + \ 'excel (.xlsx) or csv (.csv) file.' if ft == 'pkl': pd.DataFrame(self._err_recs).to_pickle(outfile) elif ft == 'xlsx': pd.DataFrame(self._err_recs).to_excel(outfile, engine='xlsxwriter', index=False) else: pd.DataFrame(self._err_recs).to_csv(outfile, index=False) return outfile def write_errors(self, out_fn): outfile, err = self._write_errors(out_fn) if err: if self._logging: msg = '\nError while writing out error log: {}\n'.format(err) self._logger.error(msg) return outfile
def __init__(self, config_file=None, config_dict=None): Logger.__init__(self, 'ConfigParser', verbosity=0) self.config_file = config_file self.config_dict = config_dict
import re import os import pdb import sys import time import boto3 import xlsxwriter from argparse import ArgumentParser import pandas as pd from geopy.geocoders import Nominatim from geopy.extra.rate_limiter import RateLimiter from utilities import Logger logger = Logger() s3 = boto3.client('s3') def read_input(input_path, type): """ Read the input file """ if type == 'csv': data = pd.read_csv(os.path.join(input_path)).fillna('') return data if type == 'xlsx': data = pd.ExcelFile(os.path.join(input_path)) print(data.sheet_names[0]) df = data.parse(str(data.sheet_names[0])) return df def s3_upload(output, s3, bucket, folder):
args = parser.parse_args() # allow gpu to be specified from command line for testing if args.gpu >= 0: chosen_device = 'cuda:%d' % (args.gpu) globals()['device'] = torch.device(chosen_device) print(device) args.device = device # create log folder and log file try: now = '%s' % datetime.datetime.now().time() now = now.replace(':', '_') args.save_dir = os.path.join(args.task, '%d' % args.num_nodes, now) os.makedirs(args.save_dir) except: pass out_file = open(os.path.join(args.save_dir, 'results.txt'), 'w+') printer = Logger(out_file, args.stdout_print) # print the run args for key, value in sorted((vars(args)).items()): printer.print_out("{}: {}".format(key, value)) if args.task == 'tsp': train_tsp(args) elif args.task == 'vrp': train_vrp(args) else: raise ValueError('Task <%s> not understood'%args.task)
from driver_interactions.ElementsInteractions import ElementsInteractions from driver_interactions.WebDriver import WebDriver import utilities.Logger as Logger import config.ConfigFile as ConfigFile import time log = Logger.func_logger() def before_all(context): log.info("Script started") context.prepare_driver = WebDriver() context.driver = context.prepare_driver.init_driver() context.bp = ElementsInteractions(context.driver) context.bp.launch_web_page(ConfigFile.url) def after_all(context): time.sleep(2) context.driver.quit() log.info("Script ended")
from sklearn.preprocessing import OneHotEncoder #from sklearn.externals import joblib import joblib from sklearn.metrics import confusion_matrix import tensorflow as tf import pandas as pd from utilities import Logger from datetime import datetime TOKENIZER_PATH = 'model/tokenizer_vocab' LABEL_ENCODER_PATH = 'model/label_encoder.pkl' MODEL_PATH = 'model/model_weights.h5' LOG_FILE_PATH = 'results/' + datetime.now().strftime('%H_%M_%d_%m_%Y.log') logger = Logger(LOG_FILE_PATH) def prepare_sequences(tokenizer, texts, options): """Tokenizes the textual input and prepares sequences by applying padding""" text = [tokenizer.encode(sample) for sample in texts] text = preprocessing.sequence.pad_sequences(text, maxlen=options.maxlen) return text def train(options): """Loads data, trains the language detection model.""" train_data = pd.read_csv(options.train, sep='\t') train_examples = train_data.text.values.astype(str) train_labels = train_data.language.values.astype(str)
def __init__(self, config): self.config = config Logger.__init__(self, 'DB_Writer', self.config.get('verbosity'))
def __init__(self, config): self.config = config Logger.__init__(self, 'SampleSelector', verbosity=self.config.get('verbosity')) self.num_cpus = multiprocessing.cpu_count()
# see https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time?rq=1 # or https://stackoverflow.com/questions/11536764/how-to-fix-attempted-relative-import-in-non-package-even-with-init-py?rq=1 try: from baseattrs import BasicDataAttributes, BaseStats from gis import GisAnalyzer, GisAnalyzerWithClusterLabel from date_and_time import DateByAnalyzer from utilities import Logger from dataframeselector import TypeSelector, NameSelector except ImportError: from .baseattrs import BasicDataAttributes, BaseStats from .gis import GisAnalyzer, GisAnalyzerWithClusterLabel from .date_and_time import DateByAnalyzer from .utilities import Logger from .dataframeselector import TypeSelector, NameSelector log = Logger.initialize_log() class NumericalAnalyzer(BaseStats): """ Provides basic visualization for numerical features """ def __init__(self, data: Union[str, pd.DataFrame]): """ Initializes a NumericalAnalyzer object. Takes data file name or a pandas DataFrame as input :param data: link to a file or a pandas DataFrame: :type data: Union[str, pd.DataFrame] """ super().__init__(data)
class ElementsInteractions: log = Log.func_logger() def __init__(self, driver): self.driver = driver def locator(self, locator_type): if locator_type == "id": return By.ID elif locator_type == "name": return By.NAME elif locator_type == "class": return By.CLASS_NAME elif locator_type == "xpath": return By.XPATH elif locator_type == "css": return By.CSS_SELECTOR elif locator_type == "tag": return By.TAG_NAME elif locator_type == "link": return By.LINK_TEXT elif locator_type == "plink": return By.PARTIAL_LINK_TEXT else: self.log.error("Locator Type : " + locator_type + " entered is not found") return False def launch_web_page(self, url): try: self.driver.get(url) self.log.info("Web Page Launched with URL : " + url) except Exception: self.log.info("Web Page not Launched with URL : " + url) def go_to_url(self, url): self.driver.get(url) def verify_page(self, page_name): if page_name != self.driver.title: self.take_screenshot(self.driver.title) assert False def back_page(self): self.driver.back() def explicit_wait(self, locator_value, locator_type, time): try: locator_by_type = self.locator(locator_type) WebDriverWait(self.driver, time).until( ec.presence_of_all_elements_located( (locator_by_type, locator_value))) self.log.info("Element found with locator " + locator_value + " using locatorType " + locator_by_type) except Exception: self.log.error("Element not found with locator " + locator_value + " using locatorType " + locator_type) print_stack() self.take_screenshot(locator_type) assert False def get_element(self, locator_value, locator_type): element = None try: locator_by_type = self.locator(locator_type) element = self.driver.find_element(locator_by_type, locator_value) self.log.info("Element found with locator " + locator_value + " using locatorType " + locator_by_type) except Exception: self.log.error("Element not found with locator " + locator_value + " using locatorType " + locator_type) print_stack() return element def get_all_elements(self, locator_value, locator_type): elements = None try: locator_by_type = self.locator(locator_type) elements = self.driver.find_elements(locator_by_type, locator_value) self.log.info("Elements found with locator " + locator_value + " using locatorType " + locator_by_type) except Exception: self.log.error("Elements not found with locator " + locator_value + " using locatorType " + locator_type) print_stack() return elements def wait_element(self, locator_value, locator_type): try: locator_by_type = self.locator(locator_type) wait = WebDriverWait(self.driver, 25, poll_frequency=1, ignored_exceptions=[ ElementNotVisibleException, NoSuchElementException ]) element = wait.until( ec.presence_of_element_located( (locator_by_type, locator_value))) self.log.info("WebElement found with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error("WebElement not found with locator value " + locator_value + " using locatorType " + locator_type) print_stack() self.take_screenshot(locator_type) assert False return element def click_element(self, locator_value, locator_type): try: element = self.wait_element(locator_value, locator_type) element.click() self.log.info("Clicked on WebElement with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error( "Unable to Click on WebElement with locator value " + locator_value + " using locatorType " + locator_type) print_stack() assert False def send_text(self, text, locator_value, locator_type): try: element = self.wait_element(locator_value, locator_type) element.send_keys(text) self.log.info("Sent the text " + text + " in WebElement with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error("Unable to Sent the text " + text + " in WebElement with locator value " + locator_value + "using locatorType " + locator_type) print_stack() self.take_screenshot(locator_type) assert False def get_text(self, locator_value, locator_type): element_text = None try: element = self.wait_element(locator_value, locator_type) element_text = element.text self.log.info("Got the text " + element_text + " from WebElement with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error("Unable to get the text " + element_text + " from WebElement with locator value " + locator_value + "using locatorType " + locator_type) print_stack() return element_text def is_element_displayed(self, locator_value, locator_type): element_displayed = None try: element = self.wait_element(locator_value, locator_type) element_displayed = element.is_displayed() self.log.info( "WebElement is Displayed on web page with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error( "WebElement is not Displayed on web page with locator value " + locator_value + " using locatorType " + locator_type) print_stack() return element_displayed def scroll(self, locator_value, locator_type): actions = ActionChains(self.driver) try: element = self.wait_element(locator_value, locator_type) actions.move_to_element(element).perform() self.log.info("Scrolled to WebElement with locator value " + locator_value + " using locatorType " + locator_type) except Exception: self.log.error( "Unable to scroll to WebElement with locator value " + locator_value + "using locatorType " + locator_type) print_stack() def take_screenshot(self, text): allure.attach(self.driver.get_screenshot_as_png(), name=text, attachment_type=AttachmentType.PNG)