def forward(self, arch): # initial the first two nodes op0_list = [] op1_list = [] for idx, (op, f, t) in enumerate(arch): if idx%2 == 0: op0_list.append(op) else: op1_list.append(op) assert len(op0_list) == len(op1_list), 'inconsistent size between op0_list and op1_list' node_list = utils.get_variable(list(range(0, 2, 1)), self.device, requires_grad=False) op0_list = utils.get_variable(op0_list, self.device, requires_grad=False) op1_list = utils.get_variable(op1_list, self.device, requires_grad=False) # first two nodes x_node_hidden = self.node_hidden(node_list) x_op0_hidden = self.op_hidden(op0_list) x_op1_hidden = self.op_hidden(op1_list) ''' node0 node1 op0, op1 ''' x_op_hidden = torch.cat((x_op0_hidden, x_op1_hidden), dim=1) x_hidden = torch.cat((x_node_hidden, x_op_hidden), dim=0) # initialize x and adj x = self.emb_attn(x_hidden) adj = utils.parse_arch(arch, self.steps+2).to(self.device) # normalize features and adj if self.normalize: x = utils.sum_normalize(x) adj = utils.sum_normalize(adj) x = F.relu(self.gc1(x, adj)) x = F.dropout(x, self.dropout, training=self.training) x = self.gc2(x, adj) x = x[2:] logits = self.fc(x) logits = logits.view(self.steps*2, -1) entropy = 0 log_p = 0 arch_list = [] try: COMPACT_PRIMITIVES = eval(self.op_type) except: assert False, 'not supported op type %s' %(self.op_type) transition_dict = LooseEnd_Transition_Dict if self.op_type == 'LOOSE_END_PRIMITIVES' else FullyConcat_Transition_Dict for idx, (op, f, t) in enumerate(arch): select_op = transition_dict[COMPACT_PRIMITIVES[op]] selected_arch_index = [COMPACT_PRIMITIVES.index(i) for i in select_op] tmp = logits[idx, selected_arch_index] prob = F.softmax(tmp, dim=-1) prob = prob + 1e-5 log_prob = torch.log(prob) entropy += -(log_prob * prob).sum() act = prob.multinomial(num_samples=1) pruner_op_name = select_op[act] f_op = COMPACT_PRIMITIVES.index(pruner_op_name) arch_list.append((f_op, f, t)) selected_log_p = log_prob.gather(-1, act) log_p += selected_log_p.sum() return arch_list, log_p, entropy
def vgg_net(self, weights, image): layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4' ) net = {} current = image for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net
def vggnet(X, trainlayers=[], weights_file=None): layers = { 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3' } current = X if not weights_file: weights = np.load(weights_file).items() for i, layer in enumerate(layers): if 'conv' in layer: if layer in trainlayers: trainable = True kernels = utils.get_variable(weights[layer]['weights'], name=layer + '/weights', trainable=trainable) biases = utils.get_variable(weights[layer]['biases'], name=layer + '/biases', trainable=trainable) current = utils.conv2d(current, kernels, biases, name=layer) elif 'relu' in layer: current = tf.nn.relu(current, name=layer) elif 'pool' in layer: current = utils.avg_pool_2x2(current, name=layer) return current
def _vgg_net(weights, image): layers = ('conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4') net = {} current = image for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] kernels = utils.get_variable(np.transpose( kernels, (0, 1, 2, 3)), name=name + '_w') bias = utils.get_variable(bias.reshape(-1), name=name + '_b') current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net
def init_hidden(self): return (get_variable(torch.zeros(2 * self.num_layers, self.batch_size, self.hidden_dim // 2), use_gpu=self.use_gpu), get_variable(torch.zeros(2 * self.num_layers, self.batch_size, self.hidden_dim // 2), use_gpu=self.use_gpu))
def _vgg_net(weights, image): print('setting up vgg model initialized params --> extractor2') layers = ('conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4') net = {} current = image for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # kernels are [width, height, in_channles, out_channles] # tensorflow are [height, width, in channles, out_channles] kernels = utils.get_variable(np.transpose( kernels, (1, 0, 2, 3)), name=name + '_w') bias = utils.get_variable(bias.reshape(-1), name=name + '_b') current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net
def _vgg_net(weights, image): layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4' ) net = {} current = image for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] kernels = utils.get_variable( np.transpose(kernels, (1, 0, 2, 3)), name=name + '_w') bias = utils.get_variable(bias.reshape(-1), name=name + '_b') current = utils.conv2d_basic(current, kernels, bias) tf.add_to_collection("losses", tf.contrib.layers.l2_regularizer(0.0005)(kernels)) elif kind == 'relu': current = tf.nn.relu(current, name=name) elif kind == 'pool': current = utils.max_pool_2x2(current) net[name] = current return net
def sample(self, batch_size=1, with_details=False, save_dir=None): """Samples a set of `args.num_blocks` many computational nodes from the controller, where each node is made up of an activation function, and each node except the last also includes a previous node. """ if batch_size < 1: raise Exception(f'Wrong batch_size: {batch_size} < 1') # [B, L, H] zeros inputs = self.static_inputs[batch_size] hidden = self.static_init_hidden[batch_size] entropies = [] log_probs = [] actions = [] # NOTE(brendan): The RNN controller alternately outputs an activation, # followed by a previous node, for each block except the last one, # which only gets an activation function. The last node is the output # node, and its previous node is the average of all leaf nodes. for block_idx in range(len(self.num_tokens)): if not self.skip_conn: decoder_index = block_idx % len(state_space) else: decoder_index = block_idx logits, hidden = self.forward(inputs, hidden, decoder_index, is_embed=(block_idx == 0)) probs = F.softmax(logits, dim=-1) log_prob = F.log_softmax(logits, dim=-1) entropy = -(log_prob * probs).sum(1, keepdim=False) action = probs.multinomial(num_samples=1).data selected_log_prob = log_prob.gather( 1, utils.get_variable(action, requires_grad=False)) entropies.append(entropy) log_probs.append(selected_log_prob[:, 0]) inputs = utils.get_variable( action[:, 0] + sum(self.num_tokens[:block_idx]), self.is_cuda, requires_grad=False) actions.append(action[:, 0]) actions = torch.stack(actions).transpose(0, 1) dags = _construct_action(actions, state_space) if with_details: return dags, torch.cat(log_probs), torch.cat(entropies) return dags
def sample(self, batch_size=1): if batch_size < 1: raise Exception(f'Wrong batch_size: {batch_size} < 1') # [B, L, H] inputs = self.static_inputs[batch_size].cuda() hidden = self.static_init_hidden[batch_size] self.entropies = [] self.log_probs = [] policy_id_list = [] magnitude_id_list = [] prob_id_list = [] for id in range(len(self.num_tokens)): # 控制器前向传播 logits, hidden = self.forward(inputs, hidden, id, is_embed=(id == 0)) # 得到输出概率、对数概率、熵 probs = F.softmax(logits, dim=-1) log_prob = F.log_softmax(logits, dim=-1) # TODO(brendan): .mean() for entropy? entropy = -(log_prob * probs).sum(1, keepdim=False) # 采样得到动作 action = probs.multinomial(num_samples=1).data selected_log_prob = log_prob.gather( 1, utils.get_variable(action, requires_grad=False)) # TODO(brendan): why the [:, 0] here? Should it be .squeeze(), or # .view()? Same below with `action`. self.entropies.append(entropy.view(-1)) self.log_probs.append(selected_log_prob[:, 0].view(-1)) mode = id % 3 inputs = utils.get_variable(action[:, 0] + sum(self.num_tokens[:mode]), requires_grad=False) if mode == 0: policy_id_list.append(action[:, 0]) elif mode == 1: magnitude_id_list.append(action[:, 0]) elif mode == 2: prob_id_list.append(action[:, 0]) subpolicy = _get_sub_policies(policy_id_list, magnitude_id_list, prob_id_list, self.args) self.entropies = torch.cat(self.entropies).sum() self.log_probs = torch.cat(self.log_probs).sum() return subpolicy
def forward(self, arch): # initial the first two nodes op0_list = [] op1_list = [] for idx, (op, f, t) in enumerate(arch): if idx % 2 == 0: op0_list.append(op) else: op1_list.append(op) assert len(op0_list) == len( op1_list), 'inconsistent size between op0_list and op1_list' node_list = utils.get_variable(list(range(0, 2, 1)), self.device, requires_grad=False) op0_list = utils.get_variable(op0_list, self.device, requires_grad=False) op1_list = utils.get_variable(op1_list, self.device, requires_grad=False) # first two nodes x_node_hidden = self.node_hidden(node_list) x_op0_hidden = self.op_hidden(op0_list) x_op1_hidden = self.op_hidden(op1_list) ''' node0 node1 op0, op1 ''' x_op_hidden = torch.cat((x_op0_hidden, x_op1_hidden), dim=1) x_hidden = torch.cat((x_node_hidden, x_op_hidden), dim=0) # initialize x and adj x = self.emb_attn(x_hidden) adj = utils.parse_arch(arch, self.steps + 2).to(self.device) # normalize features and adj if self.normalize: x = utils.sum_normalize(x) adj = utils.sum_normalize(adj) x = F.relu(self.gc1(x, adj)) x = F.dropout(x, self.dropout, training=self.training) x = self.gc2(x, adj) x = x[2:] logits = self.fc(x) logits = logits.view(self.steps * 2, -1) probs = F.softmax(logits, dim=-1) probs = probs + 1e-5 log_probs = torch.log(probs) action = probs.multinomial(num_samples=1) selected_log_p = log_probs.gather(-1, action) log_p = selected_log_p.sum() entropy = -(log_probs * probs).sum() arch = utils.translate_arch(arch, action, self.op_type) return arch, log_p, entropy
def init_hidden(self, batch_size): zeros = torch.zeros(batch_size, self.args.controller_hid) if self.args.rnn_type == 'lstm': return (utils.get_variable(zeros, self.args.cuda, requires_grad=False), utils.get_variable(zeros.clone(), self.args.cuda, requires_grad=False)) if self.args.rnn_type == 'rnn': return utils.get_variable(zeros, self.args.cuda, requires_grad=False)
def sample(self, batch_size=1, with_details=False, save_dir=None): assert batch_size >= 1 # [B, L, H] inputs = torch.Tensor([self.num_total_tokens-1]).to(self.args.device).long() hidden = None activations = [] entropies = [] log_probs = [] prev_nodes = [] for block_idx in range(2 * (self.args.num_blocks - 1) + 1): logits, hidden = self.forward(inputs, hidden, block_idx) probs = F.softmax(logits, dim=-1) log_prob = F.log_softmax(logits, dim=-1) entropy = -(log_prob * probs).sum(1, keepdim=False) # ???????? action = probs.multinomial(num_samples=1).data selected_log_prob = log_prob.gather( 1, utils.get_variable(action, requires_grad=False)) entropies.append(entropy) log_probs.append(selected_log_prob[:, 0]) # 0: function, 1: previous node mode = block_idx % 2 inputs = utils.get_variable(action[:, 0] + sum(self.num_tokens[:mode]), requires_grad=False) if mode == 0: # function activations.append(action[:, 0]) elif mode == 1: prev_nodes.append(action[:, 0]) prev_nodes = torch.stack(prev_nodes).transpose(0, 1) activations = torch.stack(activations).transpose(0, 1) dags = _construct_dags(prev_nodes, activations, self.func_names, self.args.num_blocks) # 나중에 사용 if save_dir is not None: for idx, dag in enumerate(dags): utils.draw_network(dag, os.path.join(save_dir, f'graph{idx}.png')) if with_details: return dags, torch.cat(log_probs), torch.cat(entropies) return dags
def construct_weights(self): weights = dict() weights['kc1'] = get_variable('kc1', (3, 3, 3, 64)) weights['kc2'] = get_variable('kc2', (3, 3, 64, 64)) weights['kc3'] = get_variable('kc3', (3, 3, 64, 64)) weights['kc4'] = get_variable('kc4', (3, 3, 64, 64)) weights['kd1'] = get_variable('kd1', (64 * 5 * 5, self.output_dimension)) weights['bc1'] = get_bias_variable('bc1', (64,)) weights['bc2'] = get_bias_variable('bc2', (64,)) weights['bc3'] = get_bias_variable('bc3', (64,)) weights['bc4'] = get_bias_variable('bc4', (64,)) weights['bd1'] = get_bias_variable('bd1', (self.output_dimension,)) return weights
def subset_gridded_dataset(remote_dataset, query, additional_slicers=None): """ Given a forecast (nc) and corners of a spatial subset this function returns the smallest subset of the data which fully contains the region """ slicers = {'latitude': latitude_slicer(remote_dataset['latitude'], query), 'longitude': longitude_slicer(remote_dataset['longitude'], query), 'time': time_slicer(remote_dataset['time'], query)} if not additional_slicers is None: slicers.update(additional_slicers) variables = [utils.get_variable(x) for x in query['variables']] required_variables = list(itertools.chain(*[v.required_variables() for v in variables])) remote_dataset = remote_dataset[required_variables] # Until this point all the data might live on a remote server, # so we'd like to download as little as possible. As a result # we split the subsetting into two steps, the first can be done # using slicers which minimizes downloading from openDAP servers, # the second pulls out the actual requested domain once the data # has been loaded locally. local_dataset = remote_dataset.isel(**slicers) local_dataset = subset_time(local_dataset, query['hours']) local_dataset.load() return local_dataset
def __init__(self, args): super(Controller, self).__init__() self.args = args if self.args.network_type == 'rnn': self.num_tokens = [len(args.shared_rnn_activations)] for idx in range(self.args.num_blocks): self.num_tokens += [idx + 1, len(args.shared_rnn_activations)] self.func_names = args.shared_rnn_activations elif self.args.network_type == 'cnn': self.num_tokens = [len(args.shared_cnn_types), self.args.num_blocks] self.func_names = args.shared_cnn_types num_total_tokens = sum(self.num_tokens) self.encoder = nn.Embedding(num_total_tokens, args.controller_hid) self.lstm = nn.LSTMCell( args.controller_hid, args.controller_hid) pivot = 0 self.decoders = [] for idx, size in enumerate(self.num_tokens): decoder = nn.Linear(args.controller_hid, size) self.decoders.append(decoder) self._decoders = nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = keydefaultdict(self.init_hidden) fn = lambda key: get_variable( t.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False) self.static_inputs = keydefaultdict(fn)
def __init__(self, batch_size=None, max_time_step=16): tf.reset_default_graph() self.input_speech = tf.placeholder(tf.float32, [ batch_size, max_time_step if max_time_step > 0 else None, CONFIG.num_features ], name='input_speech') self.seq_length = tf.placeholder(tf.int32, [batch_size], name='seq_length') self.labels_gt = tf.placeholder(tf.int32, [batch_size, None], name='labels_ground_truth') self.keep_prob = tf.placeholder(tf.float32, name="dropout_prob") previous_state_c = utils.get_variable( 'previous_state_c', [batch_size, CONFIG.num_cell_dim], initializer=None) previous_state_h = utils.get_variable( 'previous_state_h', [batch_size, CONFIG.num_cell_dim], initializer=None) previous_state = tf.contrib.rnn.LSTMStateTuple(previous_state_c, previous_state_h) logits, self.layers = create_model_rnn(self.input_speech, self.seq_length, keep_prob=self.keep_prob, previous_state=previous_state) new_state_c, new_state_h = self.layers['rnn_output_states'] # We initialize the states to be zero and update them after every iteration zero_state = tf.zeros([batch_size, CONFIG.num_cell_dim], "float") initialize_c = tf.assign(previous_state_c, zero_state) initialize_h = tf.assign(previous_state_h, zero_state) self.initialize_state = tf.group(initialize_c, initialize_h) with tf.control_dependencies([ tf.assign(previous_state_c, new_state_c), tf.assign(previous_state_h, new_state_h) ]): logits = tf.identity(logits, name='logits') self.phoneme_prob = tf.nn.softmax(logits) self.cost = self.__get_cost(logits)
def setUp(self): if self.BASE_USER_TYPE == "all": raise unittest.SkipTest("Skipping base test") self.civicdr_url = get_variable("BASE_URL") self.driver = webdriver.Chrome() self.addCleanup(self.driver.close) # Make sure we start anew every time self.delete_all_test_users()
def vgg_net(weights, image): """ 首先通过vgg模型初始化权值 Parameters ---------- weights: vgg模型的权重 image: 训练的样本图片 Returns ------- net: vgg模型初始化之后的模型 """ layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4' ) net = {} current = image for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] # tensorflow和mat的卷积核格式不一样,需要做个transpose变换 kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name = name + "_b") current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net
def evaluate(eval_data_path, model, batch_size, token_field, label_field, tokenizer, verbose=1, use_gpu=True): precisions, recalls, fscores = [], [], [] model.eval() id2label = label_field.vocab.itos TP = 0 pred_num = 0 true_num = 0 fileids = [ filename.replace('.txt', '') for filename in os.listdir(eval_data_path) if filename.endswith('.txt') ] for i, fileid in tqdm(enumerate(fileids)): #if i == 50: # break with open(os.path.join(eval_data_path, fileid + '.txt'), 'rt') as f: text = f.read() with open(os.path.join(eval_data_path, fileid + '.ann'), 'rt') as f: annotations = f.read().split('\n') text_spantokens = text_to_spantokens(text, tokenizer) true_ann_spantokens = annotations_to_spantokens(annotations) x = get_variable(token_field.process([tokenizer(text)], device=-1, train=False), use_gpu=use_gpu) label_ids = model(x) pred_labels = [ id2label[int(label_id)] for label_id in label_ids.squeeze(0) ] pred_ann_spantokens = labels_to_anns(pred_labels, text_spantokens) pred_num += len(pred_ann_spantokens) true_num += len(true_ann_spantokens) TP += len(set(pred_ann_spantokens) & set(true_ann_spantokens)) if verbose: print('===========================') print(true_ann_spantokens) print('-----------------') print(pred_ann_spantokens) print('-----------------') print(set(pred_ann_spantokens) & set(true_ann_spantokens)) print('===========================') precision = TP / pred_num if pred_num > 0 else 0 recall = TP / true_num if true_num > 0 else 0 fscore = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0 print( "true_num: {}, pred_num: {}, TP: {}, precision: {}, recall: {}, fscore: {}" .format(true_num, pred_num, TP, precision, recall, fscore)) return precision, recall, fscore
def forward_with_ref(self, ref_net, batch_size=1): if batch_size < 1: raise Exception(f'Wrong batch_size: {batch_size} < 1') # [B, L, H] inputs = self.static_inputs[batch_size] hidden = [ self.static_init_hidden[batch_size] for i in range(self.args.rnn_layer) ] entropies = [] log_probs = [] if self.multi_layer: layers = self.arch_layer else: layers = 1 for layer in range(layers): for block_idx in range(4 * self.args.num_blocks): logits, hidden = self.forward( inputs, hidden, layer * 4 * self.args.num_blocks + block_idx, is_embed=(block_idx == 0 and layer == 0)) probs = F.softmax(logits, dim=-1) log_prob = F.log_softmax(logits, dim=-1) entropy = -(log_prob * probs).sum(1, keepdim=False) index = layer * (4 * self.args.num_blocks) + block_idx action = ref_net[:, index] action = torch.Tensor(action).long().unsqueeze(1) action = utils.get_variable(action, self.args.cuda, requires_grad=False) selected_log_prob = log_prob.gather(1, action) entropies.append(entropy) log_probs.append(selected_log_prob[:, 0]) inputs = utils.get_variable(action[:, 0] + sum(self.num_tokens[:block_idx]), requires_grad=False) return torch.cat(log_probs), torch.cat(entropies)
def loss(self, x, y): mask = get_variable(torch.autograd.Variable(x.data.gt(0)), use_gpu=self.use_gpu) self.hidden = self.init_hidden() #print('char_hidden: {}'.format(char_hidden.shape)) embeds = self.embed(x) #print('embeds_hidden: {}'.format(embeds.shape)) lstm_out, self.hidden = self.lstm(embeds, self.hidden) #print('lstm_out_size: {}'.format(lstm_out.size())) lstm_feats = self.hidden2tag(lstm_out) #print('lstm_feats: {}'.format(lstm_feats.size())) log_likelihood = self.crf(lstm_feats, y, mask=mask) # log_likelihoodを最大にすれば良いが、最小化するので-1をかけている。 return -1 * log_likelihood
def create_graphs(self, max_time_step): tf.reset_default_graph() self.content_speech = tf.placeholder( tf.float32, [1, max_time_step, CONFIG.num_features], name='content_speech') self.content_seq_length = tf.placeholder(tf.int32, [1], name='content_seq_length') self.style_speech = tf.placeholder(tf.float32, [1, None, CONFIG.num_features]) self.style_seq_length = tf.placeholder(tf.int32, [1], name='style_seq_length') self.speech_gen = utils.get_variable( 'converted_speech', [1, max_time_step, CONFIG.num_features], initializer=tf.contrib.layers.xavier_initializer()) # Content generation initial_state = tf.contrib.rnn.LSTMStateTuple( tf.zeros([1, CONFIG.num_cell_dim]), tf.zeros([1, CONFIG.num_cell_dim])) logits_content, self.layers_content = create_model_rnn( self.content_speech, self.content_seq_length, keep_prob=1.0, previous_state=initial_state, reuse=False) logits_gen_speech, self.layers_gen_speech = create_model_rnn( self.speech_gen, self.content_seq_length, keep_prob=1.0, previous_state=initial_state, reuse=True) # Style generation embedding_style, _ = create_speaker_embedder_model( self.style_speech, self.style_seq_length, keep_prob=1.0) embedding_gen_speech, _ = create_speaker_embedder_model( self.speech_gen, self.content_seq_length, keep_prob=1.0, reuse=True) self.cost = self.__get_cost(self.layers_content['rnn_output'], self.layers_gen_speech['rnn_output'], embedding_style, embedding_gen_speech)
def forward(self, char_id_seq): char_lstm_outs = [] seq_len, batch_size, max_char_len = char_id_seq.shape for char_ids in char_id_seq: char_ids_b = get_variable(torch.from_numpy(np.flip(char_ids, 1).copy()), use_gpu=self.use_gpu) self.char_lstm_hidden_f = self.init_char_lstm_hidden(max_char_len) self.char_lstm_hidden_b = self.init_char_lstm_hidden(max_char_len) embed_f = self.char_embed(char_ids) # (bs, max_char_len, char_embed_dim) embed_b = self.char_embed(char_ids_b) # (bs, max_char_len, char_embed_dim) char_lstm_f, self.char_lstm_hidden_f = self.char_lstm_f(embed_f, self.char_lstm_hidden_f) # (bs, max_char_len, char_lstm_dim) char_lstm_b, self.char_lstm_hidden_b = self.char_lstm_b(embed_b, self.char_lstm_hidden_b) # (bs, max_char_len, char_lstm_dim) char_lstm_outs.append(torch.cat((char_lstm_f[:, -1], char_lstm_b[:, -1]), dim=1)) # (bs, char_lstm_dim * 2) char_lstm_outs = torch.cat(char_lstm_outs).view(seq_len, batch_size, -1) # (seq_length, bs, char_lstm_dim * 2) return char_lstm_outs
def forward(self): self.prev_nodes, self.prev_ops = [], [] batch_size = 1 inputs = self.static_inputs[batch_size] # batch_size x hidden_dim for node_idx in range(self.n_nodes): for i in range(2): # index_1, index_2 if node_idx == 0 and i == 0: embed = inputs else: embed = self.node_op_hidden(inputs) # force uniform probs = F.softmax(torch.zeros(node_idx + 2).type_as(embed), dim=-1) action = probs.multinomial(num_samples=1) self.prev_nodes.append(action) inputs = utils.get_variable(action, self.device, requires_grad=False) for i in range(2): # op_1, op_2 embed = self.node_op_hidden(inputs) # force uniform probs = F.softmax(torch.zeros(self.n_ops).type_as(embed), dim=-1) action = probs.multinomial(num_samples=1) self.prev_ops.append(action) inputs = utils.get_variable(action + self.n_nodes + 1, self.device, requires_grad=False) arch = utils.convert_lstm_output(self.n_nodes, torch.cat(self.prev_nodes), torch.cat(self.prev_ops)) return arch
def get_variable(nc_file, variable=None): """ returns a list of variabel values :param nc_file: NetCDF file """ from flyingpigeon import utils from numpy import squeeze if variable == None: var = utils.get_variable(nc_file) else: var = variable ds = Dataset(nc_file) values = squeeze(ds.variables[var]).tolist() return values
def __init__(self, args): super(Controller, self).__init__() self.args = args self.num_dags = 1 if self.args.use_single_controller and self.args.use_dual_controller: self.num_dags += 1 if self.args.use_single_controller and self.args.use_atten_controller: self.num_dags += 1 if self.args.network_type in ['rnn','seq2seq','classification']: self.num_tokens = [len(args.rnn_activations)] for idx in range(self.args.num_blocks): self.num_tokens += [idx + 1, len(args.rnn_activations)] self.func_names = args.rnn_activations elif self.args.network_type == 'cnn': self.num_tokens = [len(args.cnn_types), self.args.num_blocks] self.func_names = args.cnn_types else: raise Exception('Unknown network type: {self.args.network_type}') num_total_tokens = sum(self.num_tokens) self.encoder = nn.Embedding(num_total_tokens, args.controller_hid) self.lstm = nn.LSTMCell( args.controller_hid, args.controller_hid) pivot = 0 self.decoders = [] for idx, size in enumerate(self.num_tokens): decoder = nn.Linear(args.controller_hid, size) self.decoders.append(decoder) self._decoders = nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = keydefaultdict(self.init_hidden) fn = lambda key: get_variable( t.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False) self.static_inputs = keydefaultdict(fn)
def _forward(self, x_char, x_subs): embed_char = self.embed_char(x_char) embed_subs = [] for i, embed_sub in enumerate(self.embed_subs): embed_subs.append(embed_sub(x_subs[i])) embed = torch.cat([embed_char] + embed_subs, dim=2) del embed_char embed = get_variable(embed, use_gpu=self.use_gpu) #hg = self.x2h(embed) h = self.x2h(embed) for i, h2h in enumerate(self.h2hs): h = h2h(h) g_subs = [] for i, attention_sub in enumerate(self.attention_subs): g_subs.append(attention_sub(embed_subs[i], h)) hg = torch.cat([h] + g_subs, dim=2) del g_subs, embed_subs z = self.hg2z(hg) for i, z2z in enumerate(self.z2zs): z = self.z2z(z) return z
def get_loss(self, inputs, targets, dags, **kwargs): """Computes the loss for the same batch for M models. This amounts to an estimate of the loss, which is turned into an estimate for the gradients of the shared model. We store, compute the new WPL. """ if not isinstance(dags, list): dags = [dags] loss = 0 rest_loss = 0 acc = 0 num_dags = len(dags) for dag in dags: output, extra_out = self.shared(inputs, dag) output_flat = output.view(-1, self.dataset.num_classes) sample_loss = (self.ce(output_flat, targets) / self.args.shared_num_sample) acc += self.top_k_acc(output_flat, targets) / num_dags extra_out['acc'] = acc # Get WPL part if self.compute_fisher: wpl = self.shared.compute_weight_plastic_loss_with_update_fisher( dag) wpl = 0.5 * wpl loss += sample_loss + wpl rest_loss += wpl else: loss += sample_loss rest_loss = utils.get_variable(torch.zeros(1), self.cuda) logger.debug(f'Loss {loss.data[0]} = ' f'sample_loss {sample_loss.data[0]}') # assert len(dags) == 1, 'there are multiple `hidden` for multple `dags`' return loss, sample_loss, rest_loss, extra_out
def init_hidden(self, batch_size): zeros = torch.zeros(batch_size, self.args.controller_hid_size) return (utils.get_variable(zeros, self.args.cuda, requires_grad=False), utils.get_variable(zeros.clone(), self.args.cuda, requires_grad=False))
def _get_default_hidden(key): return utils.get_variable(torch.zeros( key, self.args.controller_hid_size), self.args.cuda, requires_grad=False)
def sample(self, batch_size=1, with_details=False, save_dir=None): """Samples a set of `args.num_blocks` many computational nodes from the controller, where each node is made up of an activation function, and each node except the last also includes a previous node. """ if batch_size < 1: raise Exception(f'Wrong batch_size: {batch_size} < 1') # [B, L, H] inputs = self.static_inputs[batch_size] hidden = [ self.static_init_hidden[batch_size] for i in range(self.args.rnn_layer) ] activations = [] entropies = [] log_probs = [] prev_nodes = [] # NOTE(brendan): The RNN controller alternately outputs an activation, # followed by a previous node, for each block except the last one, # which only gets an activation function. The last node is the output # node, and its previous node is the average of all leaf nodes. if self.multi_layer: layers = self.arch_layer else: layers = 1 anchor_points = [] for layer in range(layers): for block_idx in range(4 * self.args.num_blocks): is_embed = (block_idx % 4 == 0) logits, hidden = self.forward( inputs, hidden, layer * 4 * self.args.num_blocks + block_idx, is_embed=is_embed) probs = F.softmax(logits, dim=-1) log_prob = F.log_softmax(logits, dim=-1) # TODO(brendan): .mean() for entropy? entropy = -(log_prob * probs).sum(1, keepdim=False) action = probs.multinomial(num_samples=1).data selected_log_prob = log_prob.gather( 1, utils.get_variable(action, requires_grad=False)) # TODO(brendan): why the [:, 0] here? Should it be .squeeze(), or # .view()? Same below with `action`. entropies.append(entropy) log_probs.append(selected_log_prob[:, 0]) # 0,1,:previous node 2,3: function name mode = block_idx % 4 inputs = utils.get_variable( action[:, 0] + sum(self.num_tokens[:layer * 4 * self.args.num_blocks + block_idx]), requires_grad=False) if mode == 2 or mode == 3: activations.append(action[:, 0]) elif mode == 0 or mode == 1: prev_nodes.append(action[:, 0]) if mode == 3: inputs, hidden = self.forward(inputs, hidden, None, is_embed=False) anchor_points.append(inputs) del anchor_points[-1] if self.every_cell: for layer in range(layers - 1): inputs = anchor_points[-layer - 1] + inputs for block_idx in range(4 * self.args.num_blocks): is_embed = (block_idx % 4 == 0) logits, hidden = self.forward( inputs, hidden, (layers + layer) * 4 * self.args.num_blocks + block_idx, is_embed=is_embed) probs = F.softmax(logits, dim=-1) log_prob = F.log_softmax(logits, dim=-1) # TODO(brendan): .mean() for entropy? entropy = -(log_prob * probs).sum(1, keepdim=False) action = probs.multinomial(num_samples=1).data selected_log_prob = log_prob.gather( 1, utils.get_variable(action, requires_grad=False)) # TODO(brendan): why the [:, 0] here? Should it be .squeeze(), or # .view()? Same below with `action`. entropies.append(entropy) log_probs.append(selected_log_prob[:, 0]) # 0,1,:previous node 2,3: function name mode = block_idx % 4 inputs = utils.get_variable( action[:, 0] + sum(self.num_tokens[:(layers + layer) * 4 * self.args.num_blocks + block_idx]), requires_grad=False) if mode == 2 or mode == 3: activations.append(action[:, 0]) elif mode == 0 or mode == 1: prev_nodes.append(action[:, 0]) if mode == 3: inputs, hidden = self.forward(inputs, hidden, None, is_embed=False) prev_nodes = torch.stack(prev_nodes).transpose(0, 1) activations = torch.stack(activations).transpose(0, 1) dags = _construct_dags(prev_nodes, activations, self.func_names) if save_dir is not None: for idx, dag in enumerate(dags): """ utils.draw_network(dag, os.path.join(save_dir, f'graph{idx}.png')) """ pass if with_details: return dags, torch.cat(log_probs), torch.cat(entropies) return dags
def sample(self, batch_size=1, with_details=False, save_dir=None, construct_dag_method=None): """Samples a set of `args.num_blocks` many computational nodes from the controller, where each node is made up of an activation function, and each node except the last also includes a previous node. """ def _construct_micro_cnn_dags(prev_nodes, activations, func_names, num_blocks): """Constructs a set of DAGs based on the actions, i.e., previous nodes and activation functions, sampled from the controller/policy pi. This will be tailored for CNN only. Not the afore-mentioned RNN. Args: prev_nodes: Previous node actions from the policy. activations: Activations sampled from the policy. func_names: [normal_func_names, reduce_func_names] num_blocks: Number of blocks in the target RNN cell. Returns: A list of DAGs defined by the inputs. CNN cell DAGs are represented in the following way: 1. entire DAG is represent as a simple list, of element 2 [ Normal-Cell, Reduction-Cell ] 2. each element is another list, containing such information [ (node_id1, node_id2, ops), ] * num_blocks represents node1 -- ops --> node 2 3. node 0, represents the h(t-1), i.e. previous layer input node 1, represents the h(t), i.e. current input so, the actually index for current block starts from2 """ dags = [] for nodes, func_ids in zip(prev_nodes, activations): dag = [] # compute the first node # dag.append(MicroNode(0, 2, func_names[func_ids[0]])) # dag.append(MicroNode(1, 2, func_names[func_ids[0]])) leaf_nodes = set(range(2, num_blocks + 2)) # add following nodes for curr_idx, (prev_idx, func_id) in enumerate(zip(nodes, func_ids)): layer_id = curr_idx // 2 + 2 _prev_idx = utils.to_item(prev_idx) if _prev_idx == layer_id: continue assert _prev_idx < layer_id, "Crutial logical error" dag.append( MicroNode(_prev_idx, layer_id, func_names[func_id])) leaf_nodes -= set([_prev_idx]) # add leaf node connection with concat # for idx in leaf_nodes: # dag.append(MicroNode(idx, num_blocks, 'concat')) dag.sort() dags.append(dag) return dags construct_dag_method = construct_dag_method or _construct_micro_cnn_dags list_dags = [] final_log_probs = [] final_entropies = [] block_num = 4 * self.args.num_blocks # Iterate Normal cell and Reduced cell for type_id in range(2): if batch_size < 1: raise Exception(f'Wrong batch_size: {batch_size} < 1') # [B, L, H] inputs = self.static_inputs[batch_size] hidden = self.static_init_hidden[batch_size] activations = [] entropies = [] log_probs = [] prev_nodes = [] for block_idx in range((0 + type_id) * block_num, (1 + type_id) * block_num): logits, hidden = self.forward( inputs, hidden, block_idx, is_embed=(block_idx == (0 + type_id) * block_num)) probs = F.softmax(logits, dim=-1) log_prob = F.log_softmax(logits, dim=-1) entropy = -(log_prob * probs).sum(1, keepdim=False) action = probs.multinomial(num_samples=1).data selected_log_prob = log_prob.gather( 1, utils.get_variable(action, requires_grad=False)) # .view()? Same below with `action`. entropies.append(entropy) log_probs.append(selected_log_prob[:, 0]) # 1: function, 0: previous node mode = block_idx % 2 inputs = utils.get_variable(action[:, 0] + sum(self.num_tokens[:mode]), requires_grad=False) if mode == 1: activations.append(action[:, 0]) elif mode == 0: prev_nodes.append(action[:, 0]) prev_nodes = torch.stack(prev_nodes).transpose(0, 1) activations = torch.stack(activations).transpose(0, 1) dags = construct_dag_method( prev_nodes, activations, self.normal_func_names if type_id == 0 else self.reduce_func_names, self.args.num_blocks) if save_dir is not None: for idx, dag in enumerate(dags): utils.draw_network( dag, os.path.join(save_dir, f'graph{idx}.png')) # add to the final result list_dags.append(dags) final_entropies.extend(entropies) final_log_probs.extend(log_probs) list_dags = [ MicroArchi(d1, d2) for d1, d2 in zip(list_dags[0], list_dags[1]) ] if with_details: return list_dags, torch.cat(final_log_probs), torch.cat( final_entropies) if batch_size == 1 and len(list_dags) != 1: list_dags = [list_dags] elif batch_size != len(list_dags): raise RuntimeError( f"Sample batch_size {batch_size} does not match with len list_dags {len(list_dags)}" ) return list_dags
def receive(cfg_name): # Init loggers error_logger = utils.spawn_logger(cfg_name, 'error') info_logger = utils.spawn_logger(cfg_name, 'info') # Dictionary from request.data, from json to native payload = json.loads(request.data) # For cases when exception raised before on_error defined from `config` on_error = lambda *args: None try: cfg = getattr(__import__('config.%s' % cfg_name, globals(), locals(), level=-1), cfg_name) # Hooks ref_not_fit = getattr(cfg, 'ref_not_fit', lambda *args: None) on_command = getattr(cfg, 'on_command', lambda *args: None) on_error = getattr(cfg, 'on_error', lambda *args: None) # Config variables path = utils.get_variable(cfg, 'PATH', '', lambda p: not p or not os.path.exists(p), 'PATH does not exists') refs = utils.get_variable(cfg, 'REFS', [r'.*'], lambda r: not isinstance(r, list), 'REFS must be a list') commands = utils.get_variable(cfg, 'COMMANDS', [], lambda c: not isinstance(c, list), 'Define COMMANDS variable, else nothing happens') for ref in refs: refExpr = re.compile(ref, re.IGNORECASE) if refExpr.match(payload['ref']) is None: # Log message about ref does not feet and exit ref_not_fit(ref, payload) info_logger.info('Ref does not fit') return '' for command in commands: # Variables from payload, access as repository[name] command.format(**payload) on_command(command, payload) # Execute current command and log out and errors out, err = Popen(command, shell=True, cwd=os.path.join(PATH, cfg.PATH), # close_fds=True, stdout=PIPE, stderr=PIPE).communicate() if out: try: info_logger.info(out.encode('utf8')) except UnicodeDecodeError: pass if err: try: error_logger.error(err.encode('utf8')) except UnicodeDecodeError: pass except Exception, e: on_error(e, payload) error_logger.error(str(e))