def work_dir(self, test_name):
     """Ensure a working directory has been created for this test.
     Args:
         test_name (str) : name of directory
     """
     directory = os.path.join(self.testset_work_dir, test_name)
     util.maybe_create_dir(directory)
     return directory
예제 #2
0
    def work_dir(self, test_name):
        """Set up directory for a single test.

        Args:
            test_name (str): name of test (will be taken from stem of the
            name of the current file - see setupClass())

        Returns:
            str : directory path for files"""
        directory = os.path.join(self.testset_work_dir, test_name)
        util.maybe_create_dir(directory)
        return directory
예제 #3
0
    def work_dir(self, test_name):
        """  Creates new directory in workspace if doesn't already exist

        Args:
            test_name (str): name of directory to create

        Returns:
            str: path of directory created
        """
        directory = os.path.join(self.testset_work_dir, test_name)
        util.maybe_create_dir(directory)
        return directory
예제 #4
0
 def work_dir(self, test_name):
     directory = os.path.join(self.testset_work_dir, test_name)
     util.maybe_create_dir(directory)
     return directory
예제 #5
0
print( "    max_epoch               : {}".format(max_epoch) )
print( "    max_line_length         : {}".format(max_line_length) )
print( "    train_file              : {}".format(train_file) )
print( "    valid_file              : {}".format(valid_file) )
print( "    test_file               : {}".format(test_file) )
print( "    vocab_file              : {}".format(vocab_file) )
print( "    save_dir                : {}".format(save_dir) )
print( "    encoding                : {}".format(encoding) )
    

if gpu >= 0:
    cuda.get_device(gpu).use()

xp = np if gpu < 0 else cuda.cupy

maybe_create_dir(save_dir)

print(' load vocab from {} ...'.format(vocab_file) )
vocab = Vocab().load_pack(open(vocab_file, 'rb'), encoding=encoding)

vocab_size = len(vocab)
print(' vocab size: {}'.format(vocab_size) )

train_batches = MinibatchFeeder(open(train_file, 'rb'), batch_size=batch_size, 
                                max_line_length=max_line_length)
train_head_batches = MinibatchFeeder(open(train_file, 'rb'), batch_size=batch_size,
                                     max_line_length=max_line_length, max_num_lines=1000)
valid_batches = MinibatchFeeder(open(valid_file, 'rb'), batch_size=batch_size, max_line_length=max_line_length)

print( "train      : {} lines".format(train_batches.num_epoch_lines) )
print( "train(head): {} lines".format(train_head_batches.num_epoch_lines) )
예제 #6
0
    parser.add_argument('--encoding', '-e', type=str, required=False, default='utf-8', help='encoding')
    result = parser.parse_args()
    assert( result.min_count >= 0 )
    return result

args = parse_args(sys.argv)
train_file = args.train_file
valid_file = args.valid_file
test_file = args.test_file
save_dir = args.save_dir

min_count = args.min_count
max_vocab = args.max_vocab
encoding = args.encoding

maybe_create_dir(os.path.dirname(train_file))

print( "collect vocabulary ..." )
vocab = create_vocab([codecs.open(train_file, encoding=encoding)],
                     min_count=min_count, max_vocab=max_vocab)

vocab_pack_file = os.path.join(save_dir, 'vocab.pack')
print( "save vocab to {} ...".format(vocab_pack_file) )
vocab.save_pack(open(vocab_pack_file, 'wb'))

train_pack_file = os.path.join(os.path.dirname(train_file), 'train.pack')
print( "encode and pack train file {} to {} ...".format(train_file, train_pack_file) )
encode_and_pack(vocab, fin=codecs.open(train_file, 'r', encoding=encoding),
                fout=open(train_pack_file, 'wb'))

if valid_file: