def export_dict(config): from models.trie_dictionary import TrieDictionary if not os.path.isfile(config.dictionary_path): dictionary = TrieDictionary(config) with open(config.dictionary_path, 'wb') as fl: pickle.dump(dictionary, fl) logger.log_info('exported dictionary into', config.dictionary_path)
def sort(brand=False, model=False, year=False, price=False): """ Sort element by given parameter. Only one non False parameter is allowed. Args: - brand(bool): - model(bool): - year(bool): - price(bool): Returns(list): Sorted list. """ if brand: sort_parameter = 'brand' elif model: sort_parameter = 'model' elif year: sort_parameter = 'year' elif price: sort_parameter = 'price' else: return None logger.log_info("Sorting by {}".format(sort_parameter)) db = sqlite3.connect("cars.db") cursor = db.cursor() query = """SELECT * FROM cars ORDER BY {}""".format(sort_parameter) cursor.execute(query) res = cursor.fetchall() logger.log_info("Results are: {}".format(res)) return res
def add_car(brand, url, model, is_new, year, price, miles=0): """ Adds a new car to the cars table Args: - url(str): - model(str): - brand(str) - is_new(int): 1 for True, 0 for False. - year(int): - price(int) - miles(int) Returns(int): 0 for success, 1 for failure. """ logger.log_info("Adding a new car: ({}, {}, {}, {}, {})".format( model, is_new, year, miles, price)) try: db = sqlite3.connect("cars.db") cursor = db.cursor() query = """INSERT INTO cars (url, brand, model, is_new, year, miles, price) VALUES (?, ?, ?, ?, ?, ?, ?)""" cursor.execute(query, (url, brand, model, is_new, year, miles, price)) db.commit() return 0 except Exception as e: logger.log_error("Could not add car: {}".format(e)) return 1
def check_car(model, is_new, year, price, miles=0): """ Checks if a car exists. Args: - model(str) - is_new(int): 1 for True, 0 for False. - year(int): - price(int) - miles(int) Returns(bool): """ logger.log_info("Checking if car ({}, {}, {}, {}, {}) exists".format( model, is_new, year, price, miles)) db = sqlite3.connect("cars.db") cursor = db.cursor() query = """SELECT * FROM cars WHERE ( model="{}" AND is_new={} AND year={} AND price={} AND miles={});""".format(model, is_new, year, price, miles) cursor.execute(query) res = cursor.fetchone() logger.log_info("Results are: {}".format(res)) if res: return True return False
def __init__(self, config): """ Construct a dictionary. :param list(str) words: List of the words of the dictionary. :param list(str) idioms: List of the idioms of the dictionary. """ self.nexts = {0: {}} self.marked = {} self.counts = {} self.freqs = {} self.parent = {} self.marked_children = {} self.cumulative_freq = {} self.size = 0 self.total = 0 self.insert(('', 1)) self.edit_damping_factor = config.damping_factor self.alpha = config.alpha self.beta = config.beta self.gamma = config.gamma self.zeta = config.zeta self.load_words_from_file(config.vocab_path) logger.log_info("Loaded Trie dictionary from:", config.vocab_path)
def compile(self): from keras.optimizers import Adam, SGD self.model.compile(loss=self.losses, metrics=self.metrics, optimizer=Adam(lr=self.learning_rate, decay=self.decay_rate)) if not self.inference: self.model.summary(line_length=180, print_fn=logger.output) logger.log_info('compiled', self.model.metrics_names)
def get_url(self, url): """ Navigates to the given url Args: - url(str) """ logger.log_info("Navigating to {}".format(url)) self.driver.get(url)
def add_task(self, task, index): """ Adds a task to the queue. Args: - task(tupple): (cmd to execute, index). - index(str): Index on the mongodb to save the result. Returns(None): """ logger.log_info("Adding task {} to the queue".format(task)) self.queue.append((task, index))
def execute_task(self, cmd, index): """ Executes a given task from the queue and saves the result. Args: - cmd(str): - index(str): Returns(None): """ logger.log_info("Executing cmd {}".format(cmd)) output = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) res = "{}\n{}".format(output.stdout, output.stderr) logger.log_info("Output is {}".format(res)) self.lock.acquire() logger.log_info("Saving result to db") db.save_output(res, index) logger.log_info("Destroying thread") self.thread_pool.pop() self.lock.release()
def fit(self, X, Y): weights_path = os.path.join(self.tuner_dir, 'final-weights.pkl') logger.log_debug(X.shape, Y.shape, np.unique(Y, return_counts=True)) #logger.log_debug("\n", np.round(X.mean(axis=0), 5)) #logger.log_debug("\n", np.round(X.std(axis=0), 5)) msk_dels = (Y[:, 0] == 0) | (Y[:, 0] == 1) msk_adds = (Y[:, 0] == 2) | (Y[:, 0] == 3) X_adds = X[msk_adds] X_adds = X_adds[:, np.array([3, 4, 5]), ...] Y_adds = Y[msk_adds] Y_adds[Y_adds == 2] = 1 Y_adds[Y_adds == 3] = 0 combiner_adds = np.array( [[1, 0], [1, 0], [0, 1]]) X_dels = X[msk_dels] X_dels = X_dels[:, np.array([0, 1, 2]), ...] Y_dels = Y[msk_dels] Y_dels[Y_dels == 0] = 1 Y_dels[Y_dels == 1] = 0 combiner_dels = np.array( [[1, 0], [1, 0], [0, 1]]) kernel_dels, bias_dels = self.create_network(X_dels, Y_dels, combiner_dels) kernel_adds, bias_adds = self.create_network(X_adds, Y_adds, combiner_adds) weights = np.ones(X.shape[1:]) bias = np.zeros(X.shape[1:]) weights[np.array([0, 1, 2])] = kernel_dels bias[np.array([0, 1, 2])] = bias_dels weights[np.array([3, 4, 5])] = kernel_adds bias[np.array([3, 4, 5])] = bias_adds logger.log_debug("\n", np.round(weights, 3), "\n", np.round(bias, 3)) #weights, bias = sparse.get_weights() makedirs(weights_path) with open(weights_path, 'wb') as fl: pickle.dump((weights, bias), fl) logger.log_info("logged weights into", weights_path, highlight=4) logger.log_full_report_into_file(weights_path) return weights, bias
def __init__(self, name='cmds', collection='tasks'): """ Inits a MongoDb db. Is the db already exists, does nothing. Args: - name(str): Database name. - collection(str): Collections name. Returns(None): """ logger.log_info("Creating database {}".format(name)) self.client = MongoClient() logger.log_info("Client: {}".format(self.client)) self.db = self.client[name] self.collection = self.db[collection]
def __init__(self, driver_path, headless=True): """ Init Firefox webdriver Args: - driver_path(str): geckodriver full path. - headless(bool): add --headless option if True. """ options = Options() if headless: options.add_argument("--headless") logger.log_info("Init webdriver") self.driver = webdriver.Firefox(options=options, executable_path=driver_path)
def save_output(self, output, index): """ Add the output of an specific cmd on the database. Args: - output(str): - index(str): Index of the saved task Returns(None): """ logger.log_info("Saving output {} with index {}".format(output, index)) self.collection.find_one_and_update({"_id": ObjectId(index)}, {"$set": { "output": output }})
def train_data(self, gen, total=10000): weights_path = os.path.join(self.tuner_dir, 'final-weights.pkl') if os.path.isfile(weights_path): with open(weights_path, 'rb') as fl: res = pickle.load(fl) logger.log_info('loaded weights from', weights_path, highlight=4) return res logger.start() iodata_path = os.path.join(self.tuner_dir, 'iodata.pkl') if os.path.isfile(iodata_path): with open(iodata_path, 'rb') as fl: X, Y = pickle.load(fl) logger.log_info('loaded io data from', iodata_path, highlight=4) else: logger.log_info('constructing iodata', highlight=4) X = [] Y = [] sz = 0 last = None gen = take_first_n(gen, total) # chunksize = (total + 4 * NUM_THREADS - 1) // (4 * NUM_THREADS) if NUM_THREADS > 1: with closing(multiprocessing.Pool(NUM_THREADS, maxtasksperchild=4)) as pool: for i, (x, y) in tqdm(enumerate(pool.imap( self.get_data, gen)), total=total): X.append(x) Y.append(y) sz += len(x) if last is None or sz > last + 50000: logger.log_debug('%d/%d' % (i + 1, total), "with", sz, "examples so far..") last = sz pool.close() pool.join() else: for i, (x, y) in tqdm(enumerate(map( self.get_data, gen)), total=total): X.append(x) Y.append(y) sz += len(x) if last is None or sz > last + 50000: logger.log_debug('%d/%d' % (i + 1, total), "with", sz, "examples so far..") last = sz X = np.concatenate(X, axis=0) Y = np.concatenate(Y, axis=0) makedirs(iodata_path) with open(iodata_path, 'wb') as fl: pickle.dump((X, Y), fl) logger.log_info('dumping io data into', iodata_path, highlight=4) assert self.fix_delimiters_only, "The possibilities are not handled for non delimiters" return self.fit(X, Y)
def get_output(self, index): """ Returns the output of an specific cmd based on the index. Args: - index(str): Returns(dict| None): {"output": output} """ logger.log_info("Finding element {}".format(index)) try: return self.collection.find_one({"_id": ObjectId(index)}) except Exception as e: logger.log_error("Error getting output: {}".format(e)) return None
def on_epoch_end(_self, epoch, logs={}): delimiter = ' - ' time_delta = datetime.datetime.now() - _self.st logger.log_info('\nepoch: %d%s%s%stime taken: %s' % (epoch + 1, delimiter, delimiter.join('%s: %.8f' % i for i in logs.items()), delimiter, time_delta)) if (epoch + 1) % _self.freq == 0: path, fil, ext = extract_file_name(self.model_save_path) save_path = os.path.join( path, 'it%.5d-' % (epoch + 1) + fil + '.' + ext) self.save_model(save_path) logger.log_full_report_into_file(self.model_save_path, keep_log=True) if (epoch + 1) % _self.eval_freq == 0: self.sample_analysis()
def scrape_page(brand, used): web_driver.implicit_wait(10) cars = web_driver.driver.find_elements_by_tag_name('a') if used: car_class = 'linkable card card-1 card-shadow card-shadow-hover '\ 'vehicle-card _1qd1muk' else: car_class = 'linkable card card-1 card-shadow card-shadow-hover '\ 'vehicle-card' for car in cars: clas = car.get_attribute('class') if clas != car_class: continue url = car.get_attribute('href') logger.log_info("url is: {}".format(url)) card_top = car.find_element_by_class_name('vehicle-card-top') spans = card_top.find_elements_by_tag_name('span') if len(spans) > 2: year = spans[1] model = spans[2] else: year = spans[0] model = spans[1] year = year.text model = model.text.replace(" ", "_") if used: miles = car.find_elements_by_css_selector('div.font-size-1.' 'text-truncate') miles = miles[2].text miles = miles.replace(',', '').replace('miles', '') price = car.find_element_by_css_selector( 'div.heading-3.margin-y-1.' 'font-weight-bold').text price = price.replace(',', '').replace('$', '') else: miles = 0 selector = 'div.vehicle-card-location.padding-bottom-1' price = car.find_element_by_css_selector(selector) selector = 'div.d-flex.flex-row.justify-content-between' price = price.find_element_by_css_selector(selector).text price = price.split(":")[1].replace(",", "").replace("$", "") is_new = 1 if not used else 0 if not db_helper.check_car(model, is_new, year, price, miles): db_helper.add_car(brand, url, model, is_new, year, price, miles)
def __init__(self, config, from_tuner=False): self.forward_model_config = config.copy() self.forward_model_config.model = MODELS_ENUM.forward_language_model self.forward_model_config = get_language_model_config( **self.forward_model_config.__dict__) self.forward_language_model = RNNLanguageModel(self.forward_model_config) self.backward_model_config = config.copy() self.backward_model_config.model = MODELS_ENUM.backward_language_model self.backward_model_config = get_language_model_config( **self.backward_model_config.__dict__) self.backward_language_model = RNNLanguageModel(self.backward_model_config) self.bidirectional_weights = config.bidirectional_weights self.bidir = config.bidir self.lflen = config.lflen self.beam_size = config.beam_size self.history_length = config.history_length self.alphabet = DECODER_DICT self.tokenization_delimiters = ' \t' self.fix_delimiters_only = config.fix_delimiters_only if self.bidirectional_weights: self.weights = np.ones((6, 2)) self.bias = np.zeros((6, 2)) else: self.weights = np.ones((6,)) self.bias = np.zeros((6,)) self.fixer_repr = config.fixer_repr if not from_tuner: logger.log_debug(self, ', beam:', self.beam_size, ', delimiters only:', self.fix_delimiters_only) try: with open(os.path.join(config.tuner_dir, 'final-weights.pkl'), 'rb') as fl: self.weights, self.bias = pickle.load(fl) if not from_tuner: logger.log_info('loaded weights..\n', np.round(self.weights, 3), '\n', np.round(self.bias, 3)) except Exception as err: if not from_tuner: logger.log_error(err) logger.log_error('weights not found, please train tuner!.. ' 'using default weights') logger.log_info('loaded temp weights..\n', self.weights, '\n', self.bias)
def create_table(): """ Creates cars.db file and a new table called cars """ db = sqlite3.connect("cars.db") cursor = db.cursor() logger.log_info("Creating database") query = """CREATE TABLE IF NOT EXISTS cars ( id INTEGER PRIMARY KEY, url TEXT, brand TEXT, model TEXT, is_new INTEGER, year INTEGER, miles INTEGER, price INTEGER );""" cursor.execute(query)
def add_task(self, cmd): """ Add a new task to the database. Args: - cmd(str): cmd to add to the database. Returns(str): Returns the index of the new entry. """ logger.log_info("Inserting new cmd: {}".format(cmd)) new_entry = {'cmd': cmd} res = self.collection.insert_one(new_entry) index = res.inserted_id logger.log_info("New entry {} created with index {}".format( new_entry, index)) return index
def on_batch_end(_self, batch, logs={}): _self.total_steps_so_far += 1 if (_self.total_steps_so_far + 1) % 1000 == 0: path, fil, ext = extract_file_name(self.model_save_path) save_path = os.path.join( path, 'batch%.7d-' % (_self.total_steps_so_far + 1) + fil + '.' + ext) self.save_model(save_path) delimiter = ' - ' logger.log_info('\nbatch: %d%s%s' % (_self.total_steps_so_far + 1, delimiter, delimiter.join('%s: %.5f' % i for i in logs.items()))) logger.log_full_report_into_file(self.model_save_path, keep_log=True) if (_self.total_steps_so_far + 1) % 1500 == 0: self.sample_analysis()
def build_data_loader(cfg, dataset, split): with logger.log_info(msg="Build data loader of {} set for {}".format(split, dataset)): shuffle = True if split == "train" else False num_workers = cfg.data.num_workers dataset = build_dataset(cfg, dataset, split) data_loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=cfg.gnrl.batch if split == "train" else 1, shuffle=shuffle, num_workers=num_workers ) return data_loader
def add_words(self, vocab): """ Add words to the dictionary. :param list(str) vocab: List of words """ count = 0 for word in vocab: threshold = 55 if '\t' in word: word, freq = word.split('\t') freq = int(freq) else: freq = threshold + 1 if freq > threshold: if len(word) > 2 and word[1:].lower() == word[1:]: self.insert((word.lower(), freq)) else: self.insert((word, freq)) count += 1 logger.log_info('inserted', count, '/', len(vocab), 'words...')
def wait_for_element(self, timeout=10, class_name=None, xpath=None, _id=None, name=None, link_text=None, partial_link_text=None, css_selector=None): """ Waits for an element to show. Args: - timeout(int): timeout in seconds - class_name(str): - xpath(str): - _id(str): - name(str): - link_text(str): - partial_link_text(str): Returns(tuple): (rc, element): rc is equal to 0 for success, -1 for error. """ init = time.time() while time.time() - init < timeout: try: element = self._find_element(class_name, xpath, _id, name, link_text, partial_link_text, css_selector) return 0, element except Exception as e: logger.log_info("Element is not present." "waiting 0.5s: {}".format(e)) time.sleep(0.5) logger.log_error("Could not find element. Returning error") return -1, None
def evaluate( epoch: int, cfg, model: torch.nn.Module, data_loader: torch.utils.data.DataLoader, device: torch.device, loss_fn, metrics_handler, phase="valid", logger=None, save=False, *args, **kwargs, ): model.eval() # Read data and evaluate and record info. msg="{} at epoch: {}".format(phase.upper(), str(epoch).zfill(3)) with logger.log_info(msg=msg, level="INFO", state=True, logger=logger): pbar = tqdm(total=len(data_loader), dynamic_ncols=True) for idx, data in enumerate(data_loader): outputs, targets, loss = utils.infer_and_calc_loss( model=model, data=data, loss_fn=loss_fn, device=device, infer_version=cfg.gnrl.infer, *args, **kwargs ) if save: # Save results to directory. for idx_batch in range(outputs.shape[0]): out = (outputs[idx_batch].detach().cpu().numpy() * 255).astype(np.uint8) dir_save = os.path.join(cfg.save.dir, data_loader.dataset.dataset, phase) utils.try_make_path_exists(dir_save) path2dest = os.path.join(dir_save, data["img_idx"][idx_batch]+".png") succeed = cv2.imwrite(path2dest, out.transpose(1, 2, 0)) if not succeed: utils.notify("Failed to save image to {}".format(path2dest)) cur_loss = loss.detach().cpu().item() avg_loss = metrics_handler.update(data_loader.dataset.dataset, phase, epoch, "loss", cur_loss) utils.calc_and_record_metrics(data_loader.dataset.dataset, phase, epoch, outputs, targets, metrics_handler, 1.0) pbar.set_description("Epoch: {:>3} / {:<3}, avg loss: {:<5}, cur loss: {:<5}".format( epoch, cfg.train.max_epoch, round(avg_loss, 6), round(cur_loss, 6) )) pbar.update() pbar.close() metrics_handler.summarize(data_loader.dataset.dataset, phase, epoch, logger=logger) return
def compare_two(original, data_a, data_b, count): correct_a, corrupt_a, fixed_a, benchmark_a, model_a = data_a correct_b, corrupt_b, fixed_b, benchmark_b, model_b = data_b metrics_a, out_a = comparator.evaluate(correct_a, corrupt_a, fixed_a) metrics_b, out_b = comparator.evaluate(correct_b, corrupt_b, fixed_b) #if metrics_a[-1] == metrics_b[-1]: return 0 logger.output(original) logger.log_info(model_b, benchmark_b, highlight=3) logger.output(out_b) logger.output(original) logger.log_info(model_a, benchmark_a, highlight=2) logger.output(out_a) logger.log_info(model_a, 'is better', highlight=5) logger.log_info(count, 'errors so far') logger.log_seperator() logger.log_seperator() return 1
def inference( cfg, model: torch.nn.Module, data_loader: torch.utils.data.DataLoader, device: torch.device, phase, logger=None, *args, **kwargs, ): model.eval() # Read data and evaluate and record info. with logger.log_info(msg="Inference", level="INFO", state=True, logger=logger): pbar = tqdm(total=len(data_loader), dynamic_ncols=True) for idx, data in enumerate(data_loader): outputs, *_ = utils.infer(model=model, data=data, device=device, infer_version=cfg.gnrl.infer, infer_only=True, *args, **kwargs) # Save results to directory. for idx_batch in range(outputs.shape[0]): out = (outputs[idx_batch].detach().cpu().numpy() * 255).astype( np.uint8) dir_save = os.path.join(cfg.save.dir, data_loader.dataset.dataset, phase) utils.try_make_path_exists(dir_save) path2dest = os.path.join(dir_save, data["img_idx"][idx_batch] + ".png") succeed = cv2.imwrite(path2dest, out.transpose(1, 2, 0)) if not succeed: utils.notify( "Failed to save image to {}".format(path2dest)) pbar.update() pbar.close()
def train_one_epoch( epoch: int, cfg, model: torch.nn.Module, data_loader: torch.utils.data.DataLoader, device: torch.device, loss_fn, optimizer: torch.optim.Optimizer, lr_scheduler, metrics_handler, logger=None, *args, **kwargs, ): model.train() # Prepare to log info. # Read data and train and record info. data_loader.dataset.update() msg = "TRAIN at epoch: {}, lr: {:<5}".format(str(epoch).zfill(3), optimizer.param_groups[0]["lr"]) with logger.log_info(msg=msg, level="INFO", state=True, logger=logger): pbar = tqdm(total=len(data_loader), dynamic_ncols=True) for idx, data in enumerate(data_loader): optimizer.zero_grad() outputs, targets, loss = utils.infer_and_calc_loss( model=model, data=data, loss_fn=loss_fn, device=device, infer_version=cfg.gnrl.infer, *args, **kwargs ) loss.backward() optimizer.step() cur_loss = loss.detach().cpu().item() avg_loss = metrics_handler.update(data_loader.dataset.dataset, "train", epoch, "loss", cur_loss) utils.calc_and_record_metrics(data_loader.dataset.dataset, "train", epoch, outputs.detach(), targets.detach(), metrics_handler, 1.0) pbar.set_description("Epoch: {:>3} / {:<3}, avg loss: {:<5}, cur loss: {:<5}".format( epoch, cfg.train.max_epoch, round(avg_loss, 6), round(cur_loss, 6) )) pbar.update() lr_scheduler.step() pbar.close() metrics_handler.summarize(data_loader.dataset.dataset, "train", epoch, logger=logger) return
def filter(brand=None, model=None, year=None, lower_price=None, higher_price=None): """ Filter results by the non None parameters Args: - brand(str) - model(str): - year(str): - lower_price(int): - higher_price(int): Returns(list): list with the results """ logger.log_info('Args are: {}, {}, {}, {}'.format(model, year, lower_price, higher_price)) db = sqlite3.connect("cars.db") cursor = db.cursor() query = """SELECT * FROM cars WHERE (""" if brand: query += 'brand="{}" AND '.format(brand) if model: query += 'model="{} AND " '.format(model) if year: query += 'year={} AND '.format(year) if lower_price: query += 'price > {} AND '.format(lower_price) if higher_price: query += 'price < {}'.format(higher_price) if query[-4:] == 'AND ': query = query[:-5] query += ');' logger.log_info("Executing query: {}".format(query)) cursor.execute(query) res = cursor.fetchall() logger.log_info("Results are: {}".format(res)) return res
def save_model(self, save_path=None): if save_path is None: save_path = self.model_save_path makedirs(save_path) self.model.save(save_path) logger.log_info(save_path, 'saved..')