Пример #1
0
    def __init__(self, filename: str = None, **kwargs):
        """Creates a XConfig object from configuration file
        :param filename: configuration file [yaml, json, toml], defaults to None
        :type filename: str, optional
        :param replace_environment_variables: TRUE to auto replace environemnt variables placeholders, defaults to False
        :type replace_environment_variables: bool, optional
        :param plain_dict: if not None will be used as data source instead of filename, defaults to None
        :type plain_dict: dict, optional
        """

        # options
        replace_env_variables = kwargs.get("replace_environment_variables",
                                           True)
        _dict = kwargs.get("plain_dict", None)
        no_deep_parse = kwargs.get("no_deep_parse", False)

        self._filename = None

        if _dict is None:
            if filename is not None:
                self._filename = Path(filename)
                self.update(box_from_file(file=Path(filename)))
        else:
            self.update(_dict)

        self._schema = None

        if not no_deep_parse:
            self.deep_parse(
                replace_environment_variables=replace_env_variables)
def main(run_name, word2vec_path):
    config = box_from_file(Path('config_cpc.yaml'), file_type='yaml')
    use_cuda = False # use CPU
    device = torch.device("cuda" if use_cuda else "cpu")
    print('use_cuda is', use_cuda)
    # load pretrained model
    print("Loading pretrained CPC model: {}".format(run_name))
    cpc_model = CPCv1(config=config)
    checkpoint = torch.load('{}/{}-{}'.format(config.training.logging_dir, run_name,'model_best.pth'))
    cpc_model.load_state_dict(checkpoint['state_dict'])
    cpc_model.to(device)
    # get lookup table
    cpc_model.eval()
    output = cpc_model.get_word_embedding(torch.arange(config.dataset.vocab_size).to(device))
    skip_thoughts_emb = output.detach().cpu().numpy()

    # load original vocab dictionary
    print("Loading CPC dictionary")
    skip_thoughts_vocab = load_dictionary(loc='vocab.pkl')
    assert len(skip_thoughts_vocab) == config.dataset.vocab_size
    
    # Load the Word2Vec model
    print('Loading word2vec vectors at {}'.format(word2vec_path))
    word2vec = gensim.models.KeyedVectors.load_word2vec_format(word2vec_path, binary=True)
    # Run vocabulary expansion
    embedding_map = _expand_vocabulary(skip_thoughts_emb, skip_thoughts_vocab, word2vec)
    # Save expanded embeddings and dictionary
    print("Saving expanded embeddings and vocabulary")
    save_expansion(embedding_map, config)
Пример #3
0
 def test_from_all(self):
     assert isinstance(
         box_from_file(Path(test_root, "data", "json_file.json")), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "toml_file.tml")), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "yaml_file.yaml")), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "json_file.json"),
                       file_type='json'), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "toml_file.tml"),
                       file_type='toml'), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "yaml_file.yaml"),
                       file_type='yaml'), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "json_list.json")), BoxList)
     assert isinstance(
         box_from_file(Path(test_root, "data", "yaml_list.yaml")), BoxList)
Пример #4
0
 def test_bad_file(self):
     with pytest.raises(BoxError):
         box_from_file(Path(test_root, "data", "bad_file.txt"),
                       file_type="json")
     with pytest.raises(BoxError):
         box_from_file(Path(test_root, "data", "bad_file.txt"),
                       file_type="toml")
     with pytest.raises(BoxError):
         box_from_file(Path(test_root, "data", "bad_file.txt"),
                       file_type="yaml")
     with pytest.raises(BoxError):
         box_from_file(Path(test_root, "data", "bad_file.txt"),
                       file_type="msgpack")
     with pytest.raises(BoxError):
         box_from_file(Path(test_root, "data", "bad_file.txt"),
                       file_type="unknown")
     with pytest.raises(BoxError):
         box_from_file(Path(test_root, "data", "bad_file.txt"))
     with pytest.raises(BoxError):
         box_from_file("does not exist")
Пример #5
0
 def test_from_all(self):
     assert isinstance(
         box_from_file(Path(test_root, "data", "json_file.json")), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "toml_file.tml")), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "yaml_file.yaml")), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "json_file.json"),
                       file_type="json"), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "toml_file.tml"),
                       file_type="toml"), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "yaml_file.yaml"),
                       file_type="yaml"), Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "json_list.json")), BoxList)
     assert isinstance(
         box_from_file(Path(test_root, "data", "yaml_list.yaml")), BoxList)
     assert isinstance(
         box_from_file(Path(test_root, "data", "msgpack_file.msgpack")),
         Box)
     assert isinstance(
         box_from_file(Path(test_root, "data", "msgpack_list.msgpack")),
         BoxList)
     assert isinstance(
         box_from_file(Path(test_root, "data", "csv_file.csv")), BoxList)
Пример #6
0
 def test_bad_file(self):
     with pytest.raises(BoxError):
         box_from_file(Path(test_root, "data", "bad_file.txt"),
                       file_type='json')
     with pytest.raises(BoxError):
         box_from_file(Path(test_root, "data", "bad_file.txt"),
                       file_type='toml')
     with pytest.raises(BoxError):
         box_from_file(Path(test_root, "data", "bad_file.txt"),
                       file_type='yaml')
     with pytest.raises(BoxError):
         box_from_file(Path(test_root, "data", "bad_file.txt"),
                       file_type='unknown')
     with pytest.raises(BoxError):
         box_from_file(Path(test_root, "data", "bad_file.txt"))
     with pytest.raises(BoxError):
         box_from_file('does not exist')
Пример #7
0
## Torch
import torch
import torch.nn as nn
from torch.utils import data
import torch.optim as optim

## Custom Imports
from utils.logger import setup_logs
from utils.seed import set_seed
from utils.train import train, snapshot
from utils.validation import validation
from utils.dataset import BookCorpus
from model.models import CPCv1

############ Control Center and Hyperparameter ###############
config = box_from_file(Path('config_cpc.yaml'), file_type='yaml')
if config.training.resume_name:
    run_name = config.training.resume_name
else:
    run_name = "cpc" + time.strftime("-%Y-%m-%d_%H_%M_%S")
# setup logger
global_timer = timer()  # global timer
logger = setup_logs(config.training.logging_dir, run_name)  # setup logs
logger.info('### Experiment {} ###'.format(run_name))
logger.info('### Hyperparameter summary below ###\n {}'.format(config))
# setup of comet_ml
if has_comet:
    logger.info('### Logging with comet_ml ###')
    if config.comet.previous_experiment:
        logger.info('===> using existing experiment: {}'.format(
            config.comet.previous_experiment))
Пример #8
0
        """Learning rate scheduling per step"""

        self.n_current_steps += self.delta
        new_lr = np.power(self.d_model, -0.5) * np.min([
            np.power(self.n_current_steps, -0.5),
            np.power(self.n_warmup_steps, -1.5) * self.n_current_steps
        ])

        for param_group in self.optimizer.param_groups:
            param_group['lr'] = new_lr
        return new_lr


############ Control Center and Hyperparameter ###############
run_name = "cpc-clf" + time.strftime("-%Y-%m-%d_%H_%M_%S")
config_encoder = box_from_file(Path('config_cpc.yaml'), file_type='yaml')
config = box_from_file(Path('config_clf.yaml'), file_type='yaml')
global_timer = timer()  # global timer
logger = setup_logs(config.training.logging_dir, run_name)  # setup logs
logger.info('### Experiment {} ###'.format(run_name))
logger.info('### Hyperparameter summary below ###\n {}\n'.format(config))

use_cuda = not config.training.no_cuda and torch.cuda.is_available()
print('use_cuda is', use_cuda)
device = torch.device("cuda" if use_cuda else "cpu")
# set seed for reproducibility
set_seed(config.training.seed, use_cuda)
# Load pretrained CPC model
cpc_model = CPCv1(config=config_encoder)
checkpoint = torch.load(config.txt_classifier.cpc_path)
cpc_model.load_state_dict(checkpoint['state_dict'])