def __init__(self, config, paras, mode): super().__init__(config, paras, mode) # ToDo : support tr/eval on different corpus assert self.config['data']['corpus']['name'] == self.src_config['data']['corpus']['name'] self.config['data']['corpus']['path'] = self.src_config['data']['corpus']['path'] self.config['data']['corpus']['bucketing'] = False # The follow attribute should be identical to training config self.config['data']['audio'] = self.src_config['data']['audio'] self.config['data']['corpus']['train_split'] = self.src_config['data']['corpus']['train_split'] self.config['data']['text'] = self.src_config['data']['text'] self.tokenizer = load_text_encoder(**self.config['data']['text']) self.config['model'] = self.src_config['model'] self.finetune_first = 5 self.best_wer = {'att': 3.0, 'ctc': 3.0} # Output file self.output_file = str(self.ckpdir)+'_{}_{}.csv' # Override batch size for beam decoding self.greedy = self.config['decode']['beam_size'] == 1 self.dealer = Datadealer(self.config['data']['audio']) self.ctc = self.config['decode']['ctc_weight'] == 1.0 if not self.greedy: self.config['data']['corpus']['batch_size'] = 1 else: # ToDo : implement greedy raise NotImplementedError # Logger settings self.logdir = os.path.join(paras.logdir, self.exp_name) self.log = SummaryWriter( self.logdir, flush_secs=self.TB_FLUSH_FREQ) self.timer = Timer()
def __init__(self, config, paras, mode): # General Settings self.config = config self.paras = paras self.mode = mode for k, v in default_hparas.items(): setattr(self, k, v) self.device = torch.device( 'cuda') if self.paras.gpu and torch.cuda.is_available( ) else torch.device('cpu') print(torch.cuda.is_available()) self.amp = paras.amp # Name experiment self.exp_name = paras.name if self.exp_name is None: # By default, exp is named after config file self.exp_name = paras.config.split('/')[-1].replace('.yaml', '') if mode == 'train': self.exp_name += '_sd{}'.format(paras.seed) # Plugin list self.emb_decoder = None if mode == 'train': # Filepath setup os.makedirs(paras.ckpdir, exist_ok=True) self.ckpdir = os.path.join(paras.ckpdir, self.exp_name) os.makedirs(self.ckpdir, exist_ok=True) # Logger settings self.logdir = os.path.join(paras.logdir, self.exp_name) self.log = SummaryWriter(self.logdir, flush_secs=self.TB_FLUSH_FREQ) self.timer = Timer() # Hyperparameters self.step = 0 self.valid_step = config['hparas']['valid_step'] self.max_step = config['hparas']['max_step'] self.verbose('Exp. name : {}'.format(self.exp_name)) self.verbose('Loading data... large corpus may took a while.') elif mode == 'test': # Output path os.makedirs(paras.outdir, exist_ok=True) self.ckpdir = os.path.join(paras.outdir, self.exp_name) # Load training config to get acoustic feat, text encoder and build model self.src_config = yaml.load(open(config['src']['config'], 'r'), Loader=yaml.FullLoader) #print(self.src_config) self.paras.load = config['src']['ckpt'] self.verbose('Evaluating result of tr. config @ {}'.format( config['src']['config']))
def __init__(self, config, paras): # General Settings self.config = config self.paras = paras for k, v in default_hparas.items(): setattr(self, k, v) if self.paras.gpu and torch.cuda.is_available(): self.gpu = True self.device = torch.device('cuda') else: self.gpu = False self.device = torch.device('cpu') # Settings for training/testing self.mode = self.paras.mode # legacy, should be removed # Name experiment self.exp_name = paras.name if self.exp_name is None: # By default, exp is named after config file self.exp_name = paras.config.split('/')[-1].split('.y')[0] self.exp_name += '_sd{}'.format(paras.seed) # Filepath setup os.makedirs(paras.ckpdir, exist_ok=True) self.ckpdir = os.path.join(paras.ckpdir, self.exp_name) os.makedirs(self.ckpdir, exist_ok=True) # Logger settings self.logdir = os.path.join(paras.logdir, self.exp_name) self.log = SummaryWriter(self.logdir, flush_secs=self.TB_FLUSH_FREQ) self.timer = Timer() # Hyperparameters self.step = 0 self.epoch = config['hparas']['epoch'] self.verbose('Exp. name : {}'.format(self.exp_name)) self.verbose('Loading data...')
from pyspark import SparkConf, SparkContext from pyspark.sql import SQLContext conf = SparkConf().setAppName("test1").setExecutorEnv("PYTHONPATH", "/home/hadoop/cs205/") sc = SparkContext(conf=conf) sc.setLogLevel("ERROR") sqlCtx = SQLContext(sc) # import requirements from src.models.model_runner import AxelrodRunner from src.models.axelrod_economic_complexity import EconomicComplexity from src.data import game_of_thrones from src.util import Timer from graphframes import GraphFrame f = GraphFrame(*game_of_thrones.read( sqlCtx, path="file:///home/hadoop/data/asoiaf-all-edges.csv")) ec = EconomicComplexity() runner = AxelrodRunner(ec) with Timer() as t: result = runner.run(f, num_iter=25) result[0].vertices.show() print(result[1]) print("Time elapsed: {}".format(t.interval))
if __name__ == '__main__': torch.manual_seed(42) # Load data data_path = '../data/eq2_grammar_dataset.h5' data = load_data(data_path) # Turn it into a float32 PyTorch Tensor data = torch.from_numpy(data).float() # Create model model = GrammarVAE(ENCODER_HIDDEN, Z_SIZE, DECODER_HIDDEN, OUTPUT_SIZE, RNN_TYPE) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=LR) timer = Timer() log = {'loss': [], 'kl': [], 'elbo': [], 'acc': []} anneal = AnnealKL(step=1e-3, rate=500) try: for epoch in range(1, EPOCHS + 1): print('-' * 69) print('Epoch {}/{}'.format(epoch, EPOCHS)) print('-' * 69) train() except KeyboardInterrupt: print('-' * 69) print('Exiting training early') print('-' * 69)
def __init__(self, config, paras, mode): # General Settings self.config = config self.paras = paras self.mode = mode for k, v in default_hparas.items(): setattr(self, k, v) self.device = torch.device( 'cuda:' + str(paras.cuda)) if self.paras.gpu and torch.cuda.is_available( ) else torch.device('cpu') self.amp = paras.amp # Name experiment self.exp_name = paras.name if self.exp_name is None: self.exp_name = paras.config.split('/')[-1].replace( '.yaml', '') # By default, exp is named after config file if mode == 'train': self.exp_name += '_sd{}'.format(paras.seed) # Plugin list self.emb_decoder = None self.transfer_learning = False # Transfer Learning if (self.config.get('transfer', None) is not None) and mode == 'train': self.transfer_learning = True self.train_enc = self.config['transfer']['train_enc'] self.train_dec = self.config['transfer']['train_dec'] self.fix_enc = [ i for i in range(4) if i not in self.config['transfer']['train_enc'] ] self.fix_dec = not self.config['transfer']['train_dec'] log_name = '_T_{}_{}'.format( ''.join([str(l) for l in self.train_enc]), '1' if self.train_dec else '0') self.save_name = '_tune-{}-{}'.format( ''.join([str(l) for l in self.train_enc]), '1' if self.train_dec else '0') if self.paras.seed > 0: self.save_name += '-sd' + str(self.paras.seed) if mode == 'train': # Filepath setup os.makedirs(paras.ckpdir, exist_ok=True) self.ckpdir = os.path.join(paras.ckpdir, self.exp_name) os.makedirs(self.ckpdir, exist_ok=True) # Logger settings self.logdir = os.path.join( paras.logdir, self.exp_name + (log_name if self.transfer_learning else '')) self.log = SummaryWriter(self.logdir, flush_secs=self.TB_FLUSH_FREQ) self.timer = Timer() # Hyperparameters self.step = 0 self.valid_step = config['hparas']['valid_step'] self.max_step = config['hparas']['max_step'] self.verbose('Exp. name : {}'.format(self.exp_name)) self.verbose('Loading data... large corpus may took a while.') ### if resume training #self.paras.load = config['src']['ckpt'] elif mode == 'test': # Output path os.makedirs(paras.outdir, exist_ok=True) os.makedirs(os.path.join(paras.outdir, 'dev_out'), exist_ok=True) os.makedirs(os.path.join(paras.outdir, 'test_out'), exist_ok=True) self.ckpdir = os.path.join(paras.outdir, self.exp_name) # Load training config to get acoustic feat, text encoder and build model self.src_config = yaml.load(open(config['src']['config'], 'r'), Loader=yaml.FullLoader) self.paras.load = config['src']['ckpt'] self.verbose('Evaluating result of tr. config @ {}'.format( config['src']['config']))