def train(self, data): """ This method performs a model fit according to the cross validation strategy that is set by the caller. In case of no provided validation strategy, the model fitting is performed on all the provided data and a single validation score on train data is returned. After model fitting, the trained model is stored in the `model` dir. :param data: dict. of np.arrays with the features and the targets. :return: list of float, the performance scores of each iteration of the training process """ x = data['x'] y = data['y'] history = [] if self.cross_validation: # run cross validation while fitting the model for train_index, test_index in self.cross_validation.split(x): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] self.ml_model.fit(x_train, y_train) history.append(self.ml_model.score(x_test, y_test)) # store performance of each iteration else: self.ml_model.fit(x, y) history.append(self.ml_model.score(x, y)) pickle.dump(self.ml_model, open(os.path.join(get_project_root(), 'model', self.model_name), 'wb')) return history
def on_image_change(self): filename = filedialog.askopenfilename(initialdir=get_project_root(), title="Select file", filetypes=(("JPEG files", "*.jpg"), )) self._controller.on_image_change(filename)
def install(self, options): pip = self.get_pip() install_options = options.get('install_options') or [] if self.settings['ENV'] == 'development' and \ 'dev' not in install_options: install_options.append('dev') install_options = \ '[{}]'.format(','.join( [opt for opt in install_options if opt] )) if install_options else '' cmd = [ pip, 'install', '-e', '{}{}'.format(utils.get_project_root(), install_options), ] self.vprint( 2, 'Processing the following command for install:\n' '{}'.format(cmd) ) self.vprint(3, subprocess.check_output(cmd))
def __init__(self, column, new_column, has_positive=True): """ Word sentiment feature. :param column: str, the name of the feature we want to apply our pre-processing upon. :param new_column: str., the name of the new feature :param has_positive: boolean., is true the sentiment is positive, if false the sentiment is negative. """ Feature.__init__(self, column, new_column) self.has_positive = has_positive if self.has_positive: f = os.path.join(get_project_root(), 'datasets/vocabularies_and_collections/positive-words.txt') self.positive_words = open(f, encoding='utf-8', errors='ignore').read().splitlines() else: f = os.path.join(get_project_root(), 'datasets/vocabularies_and_collections/negative-words.txt') self.negative_words = open(f, encoding='utf-8', errors='ignore').read().splitlines()
def get_full_path_to_strategies_folder(strategies_folder: str) -> str: stripped_folder = strategies_folder.strip("./") strategies_path = os.path.join(utils.get_project_root(), stripped_folder) if not os.path.exists(strategies_path): print( f"[ERROR] the strategies folder '{strategies_folder}' (expanded to '{strategies_path}') does not exist." ) raise SystemExit return strategies_path
def snodas_command_error(self): project_root = utils.get_project_root() project_root_message = '' if project_root != os.getcwd(): project_root_message = \ ' from the project\'s root directory {}'.format(project_root) print( ('Unfortunately, the install command must be run ' 'using the project\'s manage.py script instead of ' 'the snodas command. Try running `python manage.py install`{}.' ).format(project_root_message) )
def __init__(self, title, description, date, name, address, city, zip_code, province, people): self.driver = webdriver.Chrome() self.title = title self.description = description self.date = date self.name = name self.address = address self.city = city self.zip_code = zip_code self.province = province self.people = people self.path = self.rootPath = os.path.abspath(get_project_root())
def test_get_fetcher_from_source_on_various_sources(self): """ It tests that the fetcher factory returns a fetcher object according to the requested fetcher. """ requested_fetchers = [('Twitter', None), ('CWUR', None), ('CSV', { 'path': get_project_root() })] for fetcher in requested_fetchers: self.assertIsInstance( get_fetcher_from_source(fetcher[0], params=fetcher[1]), Fetcher)
def commits_to_file(self) -> None: """writes commits of a repository to a csv file :return: .csv file containing commits of repository located at ./temp/files/commits/commits-{self.repository_name}.csv """ out_file_path = os.path.join(get_project_root(), "files", "commits", self.path_prefix, f"commits-{self.repository_name}.csv") os.makedirs(os.path.dirname(out_file_path), exist_ok=True) with open(out_file_path, "w", newline="") as csv_file: csv_writer = csv.DictWriter(csv_file, fieldnames=Repository.COMMIT_FIELDNAMES, quoting=csv.QUOTE_ALL) csv_writer.writeheader() for commit in self.repository.iter_commits(): files_of_commit = [row for row in self._generate_commit_numstat_line(commit)] csv_writer.writerows(files_of_commit)
def __init__(self, remote_url: str, ssh_path: str = None, project_name: str = "",verbose=False) -> None: """Initializes Repository object The repository object holds information about the local directory as well as the remote of a repository. If Args: remote_url: URL of a remote repository i.e: ssh.github.om working_dir: local directory of the repository (working directory), i.e: ./repositories/ Returns: """ logger.remove() if verbose: logger.add(sys.stderr, format="{level: <8} | {message}", level="DEBUG") else: logger.add(sys.stderr, format="{level: <8} | {message}", level="INFO") # Check if complete URL is provided if not remote_url.endswith(".git"): raise InvalidGitRepositoryError("The URL doesn't seem valid. Please provide a valid url, e.g.: http://github.com/project/repo.git") self.repository_name = remote_url[(remote_url.rindex("/") + 1):remote_url.rindex(".git")] self.remote_url = remote_url self.path_prefix = project_name self.ssh_path = None self.progress_info = self.ProgressInfo() self.working_dir = os.path.join(get_project_root(), "temp", "repositories", self.path_prefix, self.repository_name) self.repository_git = git.Git(self.working_dir) if ssh_path is not None: self.ssh_path = str(ssh_path).replace("\\", "\\\\") self.repository_git.update_environment(GIT_SSH_COMMAND=f"ssh -i {self.ssh_path}") try: self.repository = git.Repo(self.working_dir) except InvalidGitRepositoryError: logger.info( f"{os.path.abspath(self.working_dir)} is not a valid git repository (no .git folder), clone {self.remote_url} into {os.path.abspath(self.working_dir)}") self.repository = self.__clone_from_remote() except NoSuchPathError: logger.info( f"{os.path.abspath(self.working_dir)} does not exist - trying to clone {self.remote_url} into {os.path.abspath(self.working_dir)}") self.repository = self.__clone_from_remote()
def blames_to_file(self) -> str: """writes blames of a repository to a csv file :return: csv-file containing commits of repository, either located at out_file_path """ out_file_path = os.path.join(get_project_root(), "temp", "files", "blames", self.path_prefix, f"blames-{self.repository_name}.csv") os.makedirs(os.path.dirname(out_file_path), exist_ok=True) with open(out_file_path, "w", newline="", encoding="utf-8", errors="replace") as csv_file: csv_writer = csv.DictWriter(csv_file, fieldnames=Repository.BLAME_FIELDNAMES, quoting=csv.QUOTE_ALL, escapechar="\\") csv_writer.writeheader() for blame in self.__get_blames(): for line in blame: csv_writer.writerow(line) return out_file_path
def load_mushroom(random_state=42, train_frac=0.8): """ Load mushroom dataset and preprocess it """ mushroom_df = pd.read_csv( os.path.join(utils.get_project_root(), 'models/RL/data/mushroom.csv')) # shuffle lines before splitting between train and test sets mushroom_df = mushroom_df.sample(frac=1, random_state=random_state) # change categorical data to numerical for c in mushroom_df.columns: mushroom_df[c] = mushroom_df[c].astype( 'category').cat.codes # 1: poisonous, 0: edible # split between train / test train_size = int(train_frac * len(mushroom_df)) train_df, test_df = mushroom_df[:train_size], mushroom_df[train_size:] train_X, train_y = train_df.iloc[:, :-1].to_numpy( ), train_df['class'].to_numpy() test_X, test_y = test_df.iloc[:, :-1].to_numpy( ), test_df['class'].to_numpy() return mushroom_df, train_X, train_y, test_X, test_y
from utils import get_project_root def read_tinydb(path: str) -> pd.DataFrame: db = TinyDB(path) tinydb_to_pandas = [] for bill in db.all(): for item in bill['items']: row = [ bill['billId'], bill['sessionCreated'], bill['createdAt'], bill['paidAt'], bill['finalPrice'], bill['finalPriceWithoutTax'], bill['paymentMethod'], bill['createdBy']['userName'], item['name'], item['amount'], item['price'], item['vatRate'], item['productId'] ] tinydb_to_pandas.append(row) columns = [ 'billId', 'sessionCreated', 'createdAt', 'paidAt', 'finalPrice', 'finalPriceWithoutTax', 'paymentMethod', 'createdBy_userName', 'items_name', 'items_amount', 'items_price', 'items_vatRate', 'items_productId' ] dataframe = pd.DataFrame(data=tinydb_to_pandas, columns=columns) return dataframe if __name__ == '__main__': source_path = str(get_project_root() / Path('data/raw/bill_details.json')) target_path = str(get_project_root() / Path('data/interim/bill_details.xlsx')) bill_details = read_tinydb(source_path) bill_details.to_excel(target_path)
logging.info("Props: %s" % str(layer['_props'])) if '_mtype' in layer: logging.info("mtype: %s" % str(layer['_mtype'])) else: logging.info(layer.keys()) logging.info("param keys: %s" % str(layer['params'].keys())) key = '__numpy.cndarray__' W = numpy.load(StringIO(b64decode(layer["params"]["W"][key]))).tolist() b = numpy.load(StringIO(b64decode(layer["params"]["b"][key]))).tolist() layers.append({'W': W, 'b': b}) with open(model_target, "w") as f: f.write(json.dumps(layers)) if __name__ == "__main__": PROJECT_ROOT = utils.get_project_root() # Get latest model folder models_folder = os.path.join(PROJECT_ROOT, "models") latest_model = utils.get_latest_folder(models_folder) from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter parser = ArgumentParser(description=__doc__, formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--model", dest="model", help="where is the model folder (with the info.yml)?", metavar="FILE", type=lambda x: utils.is_valid_folder(parser, x), default=latest_model) parser.add_argument("-t", "--target",
if '_mtype' in layer: logging.info("mtype: %s" % str(layer['_mtype'])) else: logging.info(layer.keys()) logging.info("param keys: %s" % str(layer['params'].keys())) key = '__numpy.cndarray__' W = numpy.load(StringIO(b64decode(layer["params"]["W"][key]))).tolist() b = numpy.load(StringIO(b64decode(layer["params"]["b"][key]))).tolist() layers.append({'W': W, 'b': b}) with open(model_target, "w") as f: f.write(json.dumps(layers)) if __name__ == "__main__": PROJECT_ROOT = utils.get_project_root() # Get latest model folder models_folder = os.path.join(PROJECT_ROOT, "models") latest_model = utils.get_latest_folder(models_folder) from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter parser = ArgumentParser(description=__doc__, formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("-m", "--model", dest="model", help="where is the model folder (with the info.yml)?", metavar="FILE", type=lambda x: utils.is_valid_folder(parser, x), default=latest_model)
import glob from script import * import pytest from utils import get_project_root ROOT = get_project_root() def find_total_dictionary_with_algorithm(file_path): with open(file_path, 'r') as f: apartment_string = f.read() '''Step 1: Read and transform the txt file''' apartment_init = [ list([j for j in i.split('\n')][0]) for i in apartment_string.splitlines() ] '''Step 2: Locate the chairs in the apartment''' # Dictionary: keys: a chair position, values: its name dict_pos_chairs = {} for i, row in enumerate(apartment_init): for j, element in enumerate(row): if element in sc.chairs: dict_pos_chairs[(i, j)] = element # List of postitions of chairs ordered vertically and horizontally list_pos_chairs = list(dict_pos_chairs.keys()) list_pos_chairs.sort(key=lambda x: (x[0], x[1])) '''Step 3: Find the room of each chair'''
def __init__(self, params): Fetcher.__init__(self) self.path = os.path.join(get_project_root(), params['path'])
def setup_class(cls): super(TestPictures, cls).setup_class() utils.get_image_folder = lambda: os.path.join(utils.get_project_root(), TEST_IMAGE_FOLDER) test_utils.get_image_folder = utils.get_image_folder
def __populateSetting(cls): rootDir = get_project_root() path = os.path.join(rootDir, 'settings.json') envDict = json.loads(open(path).read()) for k, v in envDict.items(): setattr(cls.setting, k, v)
import yaml from catalyst.dl import SupervisedRunner from catalyst.dl.callbacks import ( AccuracyCallback, CheckpointCallback, InferCallback, OptimizerCallback, ) from catalyst.utils import prepare_cudnn, set_global_seed from data import read_data from model import BertForSequenceClassification from utils import get_project_root # loading config params project_root: Path = get_project_root() with open(str(project_root / "config.yml")) as f: params = yaml.load(f, Loader=yaml.FullLoader) # read and process data train_val_loaders, test_loaders = read_data(params) # initialize the model model = BertForSequenceClassification( pretrained_model_name=params["model"]["model_name"], num_classes=params["model"]["num_classes"], ) # specify criterion for the multi-class classification task, optimizer and scheduler criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(),
from selenium import webdriver from selenium.webdriver.support import expected_conditions as EC # from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By import XLUtils import numpy as np import time import os from utils import get_project_root rootPath = os.path.abspath(get_project_root()) usernameInput = '' passwordInput = '' print ("Masukan username : "******"Masukan password : ") passwordInput=input() # In[contoh user login] # if usernameInput == '': # usernameInput = '0416048803' # if passwordInput == '': # passwordInput = 'poltekpos2019' driver = webdriver.Chrome() driver.get('https://aptimas.poltekpos.ac.id/login') #Rubah Username dan pass sesuai Dosennya masing masing
import sys import utils import logging import os import time from logging import handlers from bs4 import BeautifulSoup import scrapy from multiprocessing import Queue, Manager import threading import pandas as pd import re from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry project_directory = utils.get_project_root() csv_directory = str(project_directory) + '/csv/' script_name = os.path.basename(__file__) # Logs directory setup logs_directory = os.path.join(project_directory, 'logs') if not os.path.exists(logs_directory): os.makedirs(logs_directory) if not os.path.exists(csv_directory): os.makedirs(csv_directory) # LOGGING logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) log_template = '%(asctime)s %(module)s %(levelname)s: %(message)s'
def default_conf_file(self): return os.path.join(utils.get_project_root(), utils.CONF_FILE_NAME)
def test(): gt = [[0, 1, 1], [2, 5, 0], [8, 9, 0], [20, 21, 1], [12, 13, 1], [16, 17, 0], [32, 33, 1], [28, 29, 0], [24, 15, 0]] gt = np.asarray(gt).astype(np.float64) # gt[:, 1] -= np.min(gt[:, 1]) # min_y = 0 # gt[:, 0] -= np.min(gt[:, 0]) # min_x = 0 #gt[:, :2] = gt[:, :2]/np.max(gt[:, 1]) offsets = utils.convert_gts_to_synth_format(gt, adjustments=True) gt2 = utils.convert_synth_offsets_to_gt(offsets) offsets2 = utils.convert_gts_to_synth_format(gt2, adjustments=False) gt3 = utils.convert_synth_offsets_to_gt(offsets2) np.set_printoptions(suppress=True) print(gt) print(gt2) print(gt3) print(offsets) print(offsets2) if __name__ == '__main__': # Draw a thing ROOT = utils.get_project_root() original = ROOT / f"data/processed/original_mine" file = "x.npy" np.load(original / file, allow_pickle=True)
import os from utils import get_project_root, get_source_root ROOT_DIR = get_source_root() PROJECT_ROOT = get_project_root() PICTURES_DIR = os.path.join(PROJECT_ROOT, 'Resources') # CONFIG_PATH = os.path.join(ROOT_DIR, 'configuration.conf')
In this toy example, we load a csv file with dummy tweets and replies, and we perform a word count pre-processing. """ from utils import get_project_root import os from src.etl.fetcher_factory import get_fetcher_from_source from src.feature_extraction.feature_factory import FeatureFactory if __name__ == '__main__': # fetch data data_fetcher = get_fetcher_from_source( 'CSV', params={'path': 'datasets/raw/sample_data.csv'}) data = data_fetcher.get_data(separator=',') # set features configuration feature_set = [FeatureFactory.word_count('tweet', 'word_count')] # perform feature extraction for feature in feature_set: data = feature.apply(data) # store df data.to_csv(os.path.join(get_project_root(), 'datasets/processed/processed_sample_data.csv'), sep=',', encoding='utf-8', index=False)
def _get_database(path: str) -> TinyDB: db_path = get_project_root() / Path(path) db = TinyDB(db_path) return db