示例#1
0
    def __init__(self, config, paras, mode):
        super().__init__(config, paras, mode)

        # ToDo : support tr/eval on different corpus
        assert self.config['data']['corpus']['name'] == self.src_config['data']['corpus']['name']
        self.config['data']['corpus']['path'] = self.src_config['data']['corpus']['path']
        self.config['data']['corpus']['bucketing'] = False

        # The follow attribute should be identical to training config
        self.config['data']['audio'] = self.src_config['data']['audio']
        self.config['data']['corpus']['train_split'] = self.src_config['data']['corpus']['train_split']
        self.config['data']['text'] = self.src_config['data']['text']
        self.tokenizer = load_text_encoder(**self.config['data']['text'])
        self.config['model'] = self.src_config['model']
        self.finetune_first = 5
        self.best_wer = {'att': 3.0, 'ctc': 3.0}

        # Output file
        self.output_file = str(self.ckpdir)+'_{}_{}.csv'

        # Override batch size for beam decoding
        self.greedy = self.config['decode']['beam_size'] == 1
        self.dealer = Datadealer(self.config['data']['audio'])
        self.ctc = self.config['decode']['ctc_weight'] == 1.0
        if not self.greedy:
            self.config['data']['corpus']['batch_size'] = 1
        else:
            # ToDo : implement greedy
            raise NotImplementedError

        # Logger settings
        self.logdir = os.path.join(paras.logdir, self.exp_name)
        self.log = SummaryWriter(
            self.logdir, flush_secs=self.TB_FLUSH_FREQ)
        self.timer = Timer()
示例#2
0
    def __init__(self, config, paras, mode):
        # General Settings
        self.config = config
        self.paras = paras
        self.mode = mode
        for k, v in default_hparas.items():
            setattr(self, k, v)
        self.device = torch.device(
            'cuda') if self.paras.gpu and torch.cuda.is_available(
            ) else torch.device('cpu')
        print(torch.cuda.is_available())
        self.amp = paras.amp

        # Name experiment
        self.exp_name = paras.name
        if self.exp_name is None:
            # By default, exp is named after config file
            self.exp_name = paras.config.split('/')[-1].replace('.yaml', '')
            if mode == 'train':
                self.exp_name += '_sd{}'.format(paras.seed)

        # Plugin list
        self.emb_decoder = None

        if mode == 'train':
            # Filepath setup
            os.makedirs(paras.ckpdir, exist_ok=True)
            self.ckpdir = os.path.join(paras.ckpdir, self.exp_name)
            os.makedirs(self.ckpdir, exist_ok=True)

            # Logger settings
            self.logdir = os.path.join(paras.logdir, self.exp_name)
            self.log = SummaryWriter(self.logdir,
                                     flush_secs=self.TB_FLUSH_FREQ)
            self.timer = Timer()

            # Hyperparameters
            self.step = 0
            self.valid_step = config['hparas']['valid_step']
            self.max_step = config['hparas']['max_step']

            self.verbose('Exp. name : {}'.format(self.exp_name))
            self.verbose('Loading data... large corpus may took a while.')

        elif mode == 'test':
            # Output path
            os.makedirs(paras.outdir, exist_ok=True)
            self.ckpdir = os.path.join(paras.outdir, self.exp_name)

            # Load training config to get acoustic feat, text encoder and build model
            self.src_config = yaml.load(open(config['src']['config'], 'r'),
                                        Loader=yaml.FullLoader)
            #print(self.src_config)
            self.paras.load = config['src']['ckpt']

            self.verbose('Evaluating result of tr. config @ {}'.format(
                config['src']['config']))
示例#3
0
    def __init__(self, config, paras):
        # General Settings
        self.config = config
        self.paras = paras
        for k, v in default_hparas.items():
            setattr(self, k, v)
        if self.paras.gpu and torch.cuda.is_available():
            self.gpu = True
            self.device = torch.device('cuda')
        else:
            self.gpu = False
            self.device = torch.device('cpu')

        # Settings for training/testing
        self.mode = self.paras.mode  # legacy, should be removed

        # Name experiment
        self.exp_name = paras.name
        if self.exp_name is None:
            # By default, exp is named after config file
            self.exp_name = paras.config.split('/')[-1].split('.y')[0]
            self.exp_name += '_sd{}'.format(paras.seed)

        # Filepath setup
        os.makedirs(paras.ckpdir, exist_ok=True)
        self.ckpdir = os.path.join(paras.ckpdir, self.exp_name)
        os.makedirs(self.ckpdir, exist_ok=True)

        # Logger settings
        self.logdir = os.path.join(paras.logdir, self.exp_name)
        self.log = SummaryWriter(self.logdir, flush_secs=self.TB_FLUSH_FREQ)
        self.timer = Timer()

        # Hyperparameters
        self.step = 0
        self.epoch = config['hparas']['epoch']

        self.verbose('Exp. name : {}'.format(self.exp_name))
        self.verbose('Loading data...')
示例#4
0
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext

conf = SparkConf().setAppName("test1").setExecutorEnv("PYTHONPATH",
                                                      "/home/hadoop/cs205/")
sc = SparkContext(conf=conf)
sc.setLogLevel("ERROR")
sqlCtx = SQLContext(sc)

# import requirements
from src.models.model_runner import AxelrodRunner
from src.models.axelrod_economic_complexity import EconomicComplexity
from src.data import game_of_thrones
from src.util import Timer

from graphframes import GraphFrame

f = GraphFrame(*game_of_thrones.read(
    sqlCtx, path="file:///home/hadoop/data/asoiaf-all-edges.csv"))

ec = EconomicComplexity()
runner = AxelrodRunner(ec)

with Timer() as t:
    result = runner.run(f, num_iter=25)
    result[0].vertices.show()
    print(result[1])

print("Time elapsed: {}".format(t.interval))
示例#5
0
if __name__ == '__main__':
    torch.manual_seed(42)

    # Load data
    data_path = '../data/eq2_grammar_dataset.h5'
    data = load_data(data_path)
    # Turn it into a float32 PyTorch Tensor
    data = torch.from_numpy(data).float()

    # Create model
    model = GrammarVAE(ENCODER_HIDDEN, Z_SIZE, DECODER_HIDDEN, OUTPUT_SIZE,
                       RNN_TYPE)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)

    timer = Timer()
    log = {'loss': [], 'kl': [], 'elbo': [], 'acc': []}
    anneal = AnnealKL(step=1e-3, rate=500)

    try:
        for epoch in range(1, EPOCHS + 1):
            print('-' * 69)
            print('Epoch {}/{}'.format(epoch, EPOCHS))
            print('-' * 69)
            train()

    except KeyboardInterrupt:
        print('-' * 69)
        print('Exiting training early')
        print('-' * 69)
示例#6
0
    def __init__(self, config, paras, mode):
        # General Settings
        self.config = config
        self.paras = paras
        self.mode = mode
        for k, v in default_hparas.items():
            setattr(self, k, v)
        self.device = torch.device(
            'cuda:' +
            str(paras.cuda)) if self.paras.gpu and torch.cuda.is_available(
            ) else torch.device('cpu')
        self.amp = paras.amp

        # Name experiment
        self.exp_name = paras.name
        if self.exp_name is None:
            self.exp_name = paras.config.split('/')[-1].replace(
                '.yaml', '')  # By default, exp is named after config file
            if mode == 'train':
                self.exp_name += '_sd{}'.format(paras.seed)

        # Plugin list
        self.emb_decoder = None

        self.transfer_learning = False
        # Transfer Learning
        if (self.config.get('transfer', None) is not None) and mode == 'train':
            self.transfer_learning = True
            self.train_enc = self.config['transfer']['train_enc']
            self.train_dec = self.config['transfer']['train_dec']
            self.fix_enc = [
                i for i in range(4)
                if i not in self.config['transfer']['train_enc']
            ]
            self.fix_dec = not self.config['transfer']['train_dec']
            log_name = '_T_{}_{}'.format(
                ''.join([str(l) for l in self.train_enc]),
                '1' if self.train_dec else '0')
            self.save_name = '_tune-{}-{}'.format(
                ''.join([str(l) for l in self.train_enc]),
                '1' if self.train_dec else '0')

            if self.paras.seed > 0:
                self.save_name += '-sd' + str(self.paras.seed)

        if mode == 'train':
            # Filepath setup
            os.makedirs(paras.ckpdir, exist_ok=True)
            self.ckpdir = os.path.join(paras.ckpdir, self.exp_name)
            os.makedirs(self.ckpdir, exist_ok=True)

            # Logger settings
            self.logdir = os.path.join(
                paras.logdir,
                self.exp_name + (log_name if self.transfer_learning else ''))
            self.log = SummaryWriter(self.logdir,
                                     flush_secs=self.TB_FLUSH_FREQ)
            self.timer = Timer()

            # Hyperparameters
            self.step = 0
            self.valid_step = config['hparas']['valid_step']
            self.max_step = config['hparas']['max_step']

            self.verbose('Exp. name : {}'.format(self.exp_name))
            self.verbose('Loading data... large corpus may took a while.')
            ### if resume training
            #self.paras.load = config['src']['ckpt']

        elif mode == 'test':
            # Output path
            os.makedirs(paras.outdir, exist_ok=True)
            os.makedirs(os.path.join(paras.outdir, 'dev_out'), exist_ok=True)
            os.makedirs(os.path.join(paras.outdir, 'test_out'), exist_ok=True)
            self.ckpdir = os.path.join(paras.outdir, self.exp_name)

            # Load training config to get acoustic feat, text encoder and build model
            self.src_config = yaml.load(open(config['src']['config'], 'r'),
                                        Loader=yaml.FullLoader)
            self.paras.load = config['src']['ckpt']

            self.verbose('Evaluating result of tr. config @ {}'.format(
                config['src']['config']))