Пример #1
0
def train(epoch):
    model.train()
    data = shuffle_data(train_data, 1)
    total_loss = 0.0
    for num, i in enumerate(range(0, len(data), args.batch_size)):
        one = data[i:i + args.batch_size]
        query, _ = padding([x[0] for x in one], max_len=50)
        passage, _ = padding([x[1] for x in one], max_len=350)
        answer = pad_answer([x[2] for x in one])
        query, passage, answer = torch.LongTensor(query), torch.LongTensor(
            passage), torch.LongTensor(answer)
        if args.cuda:
            query = query.cuda()
            passage = passage.cuda()
            answer = answer.cuda()
        optimizer.zero_grad()
        loss = model([query, passage, answer, True])
        loss.backward()
        total_loss += loss.item()
        optimizer.step()
        if (num + 1) % args.log_interval == 0:
            print(
                '|------epoch {:d} train error is {:f}  eclipse {:.2f}%------|'
                .format(epoch, total_loss / args.log_interval,
                        i * 100.0 / len(data)))
            total_loss = 0
Пример #2
0
def inference():
    model.eval()
    predictions = []
    with torch.no_grad():
        for i in range(0, len(data), args.batch_size):
            one = data[i:i + args.batch_size]
            query, _ = padding([x[0] for x in one], max_len=50)
            passage, _ = padding([x[1] for x in one], max_len=300)
            answer = pad_answer([x[2] for x in one])
            str_words = [x[-1] for x in one]
            ids = [x[3] for x in one]
            query, passage, answer = torch.LongTensor(query), torch.LongTensor(
                passage), torch.LongTensor(answer)
            if args.cuda:
                query = query.cuda()
                passage = passage.cuda()
                answer = answer.cuda()
            output = model([query, passage, answer, False])
            for q_id, prediction, candidates in zip(ids, output, str_words):
                prediction_answer = u''.join(candidates[prediction])
                predictions.append(str(q_id) + '\t' + prediction_answer)
    outputs = u'\n'.join(predictions)
    with codecs.open(args.output, 'w', encoding='utf-8') as f:
        f.write(outputs)
    print('done!')
Пример #3
0
 def __init__(self, filename, *args, **kwargs):
     name = 'ffmpeg'
     tags = '/hafarm/ffmpeg'
     super(BatchMp4, self).__init__(name, tags, *args, **kwargs)
     scene_file_path, _, _, _ = utils.padding(filename, 'nuke')
     base, file = os.path.split(scene_file_path)
     file, _ = os.path.splitext(file)
     inputfile = os.path.join(base, const.PROXY_POSTFIX, file + '.jpg')
     outputfile = os.path.join(base, utils.padding(filename)[0] + 'mp4')
     self.parms['command_arg'] = ['-y -r 25 -i %s -an -vcodec libx264 -vpre slow -crf 26 -threads 1 %s' % (inputfile, outputfile)]
     self.parms['exe'] = 'ffmpeg'
     self.parms['job_name'] << { 'render_driver_type': 'mp4' }
def test():
    vES = elasticsearchVisitor()
    request_amount = 10
    model.eval()
    with torch.no_grad():
        while True:
            raw_post = raw_input('input:')
            if raw_post == 'exit':
                break
            # post规范化
            raw_post = ' '.join(word_tokenize(raw_post))
            raw_post = ''.join([w.lower() for w in raw_post])
            # 选出90条候选评论
            candi_comms = vES.visitSpaceQuery(raw_post, request_amount * 3)
            # comms规范化
            candi_comms = filter_comms(candi_comms[::-1], request_amount * 3,
                                       30)
            post = map_sent_to_id(raw_post.split())
            posts, _ = padding([post for _ in range(request_amount * 3)],
                               max_len=30)
            posts = torch.LongTensor(posts)
            if args.cuda:
                posts = posts.cuda()
            comms = list()
            for comm in candi_comms:
                comm = word_tokenize(comm)
                comm = map_sent_to_id(comm)
                comms.append(comm)
            comms, _ = padding([comm for comm in comms], max_len=30)
            comms = torch.LongTensor(comms)
            if args.cuda:
                comms = comms.cuda()
            start = time.time()
            output = model([posts, comms, False])
            print("model cost time %f" % (time.time() - start))
            print('all comm candidates:')
            score_comm = list()
            for i, comm in enumerate(candi_comms):
                jaccard = jaccard_similarity(raw_post, comm)
                score_comm.append([output[i, 0].item(), jaccard, comm])
            score_comm = sorted(score_comm, key=lambda x: x[0], reverse=True)
            for score, jaccard, comm in score_comm:
                print(str(score) + '\t' + str(jaccard) + '\t' + comm)
            print("------------------------------")
            print('match comms:')
            for score, jaccard, comm in score_comm:
                if score > 0.9 and jaccard < 0.4:
                    print(comm)
Пример #5
0
def dataset_input_fn(is_train, batch_size=64, split=1):
    sounds, labels = train[split - 1] if is_train is True else val[split - 1]
    labels = np.array(labels).reshape((-1, 1))
    dataset = tf.data.Dataset.from_generator(
        lambda: zip(sounds, labels),
        output_types=(tf.float32, tf.int32),
        output_shapes=(tf.TensorShape([None]), tf.TensorShape(1)))

    # if is_train:
    # if opt.strongAugment:
    #     dataset = dataset.map(U.random_scale(1.25))
    dataset = dataset.map(U.padding(opt.inputLength // 2))
    dataset = dataset.map(U.random_crop(opt.inputLength))
    dataset = dataset.map(U.normalize(float(2**16 / 2)))
    dataset = dataset.shuffle(1000)

    # else:
    #     # if not opt.longAudio:
    #     dataset = dataset.map(U.padding(opt.inputLength // 2))
    #     dataset = dataset.map(U.random_crop(opt.inputLength))
    #     dataset = dataset.map(U.normalize(float(2 ** 16 / 2)))
    #     # dataset = dataset.map(U.multi_crop(opt.inputLength, opt.nCrops))

    dataset = dataset.batch(batch_size)
    dataset = dataset.map(U.reshape([batch_size, -1, 1]))
    iterator = dataset.make_one_shot_iterator()

    return iterator.get_next()
Пример #6
0
def get_test_loader(full_data, support, query, pad_idx):
    loader = []
    for filename in full_data:
        # support
        support_data = full_data[filename]['neg']['support_data'][
            0:support] + full_data[filename]['pos']['support_data'][0:support]
        support_data = batch_padding(support_data, pad_idx)
        support_target = full_data[filename]['neg']['support_target'][
            0:support] + full_data[filename]['pos']['support_target'][0:support]
        support_target = torch.tensor(support_target)
        # query
        neg_dl = DataLoader(Dataset(full_data[filename]['neg'], pad_idx),
                            batch_size=query * 2,
                            shuffle=False,
                            drop_last=False,
                            **kwargs)
        pos_dl = DataLoader(Dataset(full_data[filename]['pos'], pad_idx),
                            batch_size=query * 2,
                            shuffle=False,
                            drop_last=False,
                            **kwargs)
        # combine
        for dl in [neg_dl, pos_dl]:
            for batch_data, batch_target in dl:
                support_data_cp, support_target_cp = copy.deepcopy(
                    support_data), copy.deepcopy(support_target)
                support_data_cp, batch_data = padding(support_data_cp,
                                                      batch_data, pad_idx)
                data = torch.cat([support_data_cp, batch_data], dim=0)
                target = torch.cat([support_target_cp, batch_target], dim=0)
                loader.append((data, target))
    print('test loader length', len(loader))
    return loader
Пример #7
0
 def __init__(self, filename, *args, **kwargs):
     """
     Args:
         filename (str): 
     Kwargs:
         start (int): 
         end (int):
     """
     name = 'debug_images.py'
     tags = '/hafarm/debug_images'
     super(BatchDebug, self).__init__(name, tags, *args, **kwargs)
     self.parms['start_frame'] = kwargs.get('start', 1)
     self.parms['end_frame'] = kwargs.get('end', 1)
     scene_file_path, _, frame_padding_length, ext = utils.padding(filename)
     path, file = os.path.split(filename)
     path = os.path.join(path, const.DEBUG_POSTFIX)
     self.parms['pre_render_script'] = 'mkdir -p %s' % path
     self.parms['scene_file'] << {
         'scene_fullpath': scene_file_path + const.TASK_ID_PADDED + ext
     }
     self.parms['exe'] = '$HAFARM_HOME/scripts/debug_images.py'
     self.parms['command_arg'] = [
         '--job %s' % self.parms['job_name'], '--save_json -i'
     ]
     self.parms['frame_padding_length'] = int(frame_padding_length)
     self.parms['job_name'] << {'render_driver_type': 'debug'}
    def read_iris_data(self, img_paths, margin=5, is_augment=False):
        batch_imgs = np.zeros((len(img_paths), self.input_img_shape[1], self.input_img_shape[1], 1), dtype=np.float32)
        batch_labels = np.zeros((len(img_paths), 1), dtype=np.uint8)

        for i, img_path in enumerate(img_paths):
            mask = np.zeros((self.img_shape[0], self.img_shape[1], 3), dtype=np.uint8)

            # Extract Iris part
            img_combine = cv2.imread(img_path)
            img = img_combine[:, :self.img_shape[1], :]
            seg = img_combine[:, self.img_shape[1]:, :]

            if is_augment is True:
                # Data augmentation: random brightness + random rotation
                img_aug, seg_aug = utils.data_augmentation(img, seg)
                mask[:, :, :][seg_aug[:, :, 1] == 204] = 1
                img = img_aug * mask
            else:
                mask[:, :, :][seg[:, :, 1] == 204] = 1
                img = img * mask

            # Cropping iris part
            x, y, w, h = cv2.boundingRect(mask[:, :, 1])
            new_x = np.maximum(0, x - margin)
            new_y = np.maximum(0, y - margin)
            crop_img = img[new_y:new_y + h + margin, new_x:new_x + w + margin, 1]  # Extract more bigger area

            # Padding to the required size by preserving ratio of height and width
            batch_imgs[i, :, :, 0] = utils.padding(crop_img)
            batch_labels[i] = self.convert_to_cls(img_path)

        return batch_imgs, batch_labels
Пример #9
0
 def __init__(self, filename, *args, **kwargs):
     """
     Args:
         filename (str): 
     Kwargs:
         resend_frames (bool): Current state to be in.
         ifd_path
         mad_threshold (float):
     """
     name = 'generate_render_report.py'
     tags = '/hafarm/generate_render_report'
     super(BatchReportsMerger, self).__init__(name, tags, *args, **kwargs)
     resend_frames = kwargs.get('resend_frames', False)
     ifd_path = kwargs.get('ifd_path')
     mad_threshold = kwargs.get('mad_threshold', 5.0)
     send_email = '--send_email'
     ifd_path = '--ifd_path %s' % ifd_path if ifd_path else ''
     resend_frames = '--resend_frames' if resend_frames else ''
     path, filename = os.path.split(filename)
     scene_file_path, _, _, _ = utils.padding(filename, 'shell')
     log_path = os.path.join(path, const.DEBUG_POSTFIX)
     self.parms['job_name'] << {'render_driver_type': 'reports'}
     self.parms['scene_file'] << {
         'scene_file_path': log_path,
         'scene_file_basename': scene_file_path,
         'scene_file_ext': 'json'
     }
     self.parms['exe'] = '$HAFARM_HOME/scripts/generate_render_report.py'
     self.parms['command_arg'] = [
         send_email, ifd_path, resend_frames,
         "--mad_threshold %s" % (mad_threshold), "--save_html"
     ]
Пример #10
0
def pastex25(img):
    width, height = img.size
    imgx9 = padding(img, (height * 5, width * 5), True)
    angles = [
        -15,
        -30,
        -45,
        -60,
        -75,
        -90,
        -105,
        -120,
        -135,
        -150,
        -165,
        -180,
        15,
        30,
        45,
        60,
        75,
        90,
        105,
        120,
        135,
        150,
        165,
        180,
    ]
    xys = [
        [0, 0],
        [width, 0],
        [width * 2, 0],
        [width * 3, 0],
        [width * 4, 0],
        [0, height],
        [width, height],
        [width * 2, height],
        [width * 3, height],
        [width * 4, height],
        [0, height * 2],
        [width, height * 2],
        [width * 3, height * 2],
        [width * 4, height * 2],
        [0, height * 3],
        [width, height * 3],
        [width * 2, height * 3],
        [width * 3, height * 3],
        [width * 4, height * 3],
        [0, height * 4],
        [width, height * 4],
        [width * 2, height * 4],
        [width * 3, height * 4],
        [width * 4, height * 4],
    ]
    for angle, (x, y) in zip(angles, xys):
        aimg = img.rotate(angle, resample=Image.BICUBIC)
        imgx9.paste(aimg, (x, y, x + width, y + height))
    return imgx9
Пример #11
0
 def gen_file(self, data_set, size=True):
     if size:
         size = self.config.batch_size
     else:
         size = 5 * self.config.batch_size
     samples = random.sample(list(data_set), size)
     inputs, length, labels = zip(*self.load_file(samples))
     max_len = max(length)
     inputs = padding(inputs, max_len)
     return inputs, length, labels
Пример #12
0
def test():
    model.eval()
    r, a = 0.0, 0.0
    with torch.no_grad():
        for i in range(0, len(dev_data), args.batch_size):
            one = dev_data[i:i + args.batch_size]
            query, _ = padding([x[0] for x in one], max_len=50)
            passage, _ = padding([x[1] for x in one], max_len=500)
            answer = pad_answer([x[2] for x in one])
            query, passage, answer = torch.LongTensor(query), torch.LongTensor(
                passage), torch.LongTensor(answer)
            if args.cuda:
                query = query.cuda()
                passage = passage.cuda()
                answer = answer.cuda()
            output = model([query, passage, answer, False])
            r += torch.eq(output, 0).sum().item()
            a += len(one)
    return r * 100.0 / a
Пример #13
0
    def preprocess_setup(self):
        if self.train:
            funcs = []
            if self.opt.strongAugment:
                funcs += [U.random_scale(1.25)]

            funcs += [
                U.padding(self.opt.inputLength // 2),
                U.random_crop(self.opt.inputLength),
                U.normalize(32768.0),
            ]

        else:
            funcs = [
                U.padding(self.opt.inputLength // 2),
                U.normalize(32768.0),
                U.multi_crop(self.opt.inputLength, self.opt.nCrops),
            ]

        return funcs
Пример #14
0
    def handle_human_parsing(self, req):

        if self.net == None:
            try:
                self.net = caffe.Net(self.model_path, self.weight_path,
                                     caffe.TEST)
            except:
                rospy.logerr("Error, cannot load deep_net to the GPU")
                self.net = None
                self.service_queue -= 1
                return HumanParsingResponse()

        try:
            img = self.br.imgmsg_to_cv2(req.rgb_img, desired_encoding="bgr8")
            if self.gpu_id >= 0:
                caffe.set_mode_gpu()
                caffe.set_device(self.gpu_id)
            else:
                caffe.set_mode_cpu()

            cv2.imwrite(
                os.path.dirname(os.path.realpath(__file__)) +
                "/../images/rgb_image.png", img)

            img_padding = padding(img, 1)
            img_padding = img_padding.astype(np.float32)
            img_padding -= self.mean
            data = img_padding.transpose((2, 0, 1))
            self.net.blobs['data'].data[0, ...] = data
            out = self.net.forward()
            prediction = self.net.blobs['fc8_mask'].data[0, ...][0]
            prediction = prediction[1:481, 1:641]
            prediction = prediction.astype(np.int8)
            prediction = cv2.merge((prediction, prediction, prediction))
            prediction_rgb = np.zeros(prediction.shape, dtype=np.uint8)
            label_colours_bgr = self.label_colours[..., ::-1]
            cv2.LUT(prediction, label_colours_bgr, prediction_rgb)
            cv2.imwrite(
                os.path.dirname(os.path.realpath(__file__)) +
                "/../images/prediction_rgb.png", prediction_rgb)

            overlay_img = overlay(img, prediction_rgb)
            cv2.imwrite(
                os.path.dirname(os.path.realpath(__file__)) +
                "/../images/overlay.png", overlay_img)
            segmentation_img_msg = self.br.cv2_to_imgmsg(prediction_rgb,
                                                         encoding="bgr8")

            self.net = None
            return HumanParsingResponse(segmentation_img=segmentation_img_msg)

        except cv_bridge.CvBridgeError as e:
            rospy.logerr("CVBridge exception %s", e)
            return HumanParsingResponse()
Пример #15
0
 def combine_batch(self, neg_data, neg_target, pos_data, pos_target):
     neg_data, pos_data = padding(neg_data, pos_data, pad_idx=self.pad_idx)
     # combine support data and query data
     support_data = torch.cat([neg_data[0:self.support], pos_data[0:self.support]], dim=0)
     query_data = torch.cat([neg_data[self.support:], pos_data[self.support:]], dim=0)
     data = torch.cat([support_data, query_data], dim=0)
     # combine support target and query target
     support_target = torch.cat([neg_target[0:self.support], pos_target[0:self.support]], dim=0)
     query_target = torch.cat([neg_target[self.support:], pos_target[self.support:]], dim=0)
     target = torch.cat([support_target, query_target], dim=0)
     return data, target
Пример #16
0
    def preprocess_setup(self):
        if self.train:
            funcs = [U.normalize(self.mean, self.std),
                     U.horizontal_flip(),
                     U.padding(4),
                     U.random_crop(32),
                     ]
        else:
            funcs = [U.normalize(self.mean, self.std)]

        return funcs
Пример #17
0
    def load_train_data(self, positive_file, negative_file):
        # Load data
        positive_examples = []
        negative_examples = []
        with open(positive_file) as fin:
            for line in fin:
                line = line.strip()
                line = line.split()
                parse_line = [int(x) for x in line]
                parse_line = padding(parse_line, self.seq_length, self.unk)
                positive_examples.append(parse_line)
        with open(negative_file) as fin:
            for line in fin:
                line = line.strip()
                line = line.split()
                parse_line = [int(x) for x in line]
                parse_line = padding(parse_line, self.seq_length, self.unk)
                negative_examples.append(parse_line)
        num_negative = (
            (len(positive_examples) + len(negative_examples)) //
            self.batch_size) * self.batch_size - len(positive_examples)
        negative_examples = negative_examples[:num_negative]
        self.sentences = np.array(positive_examples + negative_examples)
        # Generate labels
        positive_labels = [[0, 1] for _ in positive_examples]
        negative_labels = [[1, 0] for _ in negative_examples]
        self.labels = np.concatenate([positive_labels, negative_labels], 0)

        # Shuffle the data
        shuffle_indices = np.random.permutation(np.arange(len(self.labels)))
        self.sentences = self.sentences[shuffle_indices]
        self.labels = self.labels[shuffle_indices]

        # Split batches
        self.num_batch = int(len(self.labels) / self.batch_size)
        #self.sentences = self.sentences[:self.num_batch * self.batch_size]
        #self.labels = self.labels[:self.num_batch * self.batch_size]
        self.sentences_batches = np.split(self.sentences, self.num_batch, 0)
        self.labels_batches = np.split(self.labels, self.num_batch, 0)

        self.pointer = 0
Пример #18
0
def get_ifd_files(ifds):
    ifds = ifds.strip()
    if not os.path.exists(ifds):
        print >> sys.stderr, ('Error! Ifd file not found: "%s"'%ifds)
        return ([],'','')
    # Rediscover ifds:
    # FIXME: should be simple unexpandedString()
    seq_details = utils.padding(ifds)
    # Find real file sequence on disk. Param could have $F4...
    real_ifds = glob.glob(seq_details[0] + "*" + seq_details[-1])
    real_ifds.sort()
    if real_ifds == []:
        print "Can't find ifds files: %s" % ifds
    return real_ifds, os.path.split(seq_details[0])[1], seq_details[0] + const.TASK_ID + '.ifd'
Пример #19
0
def draw(game):
    # 渲染函数
    def _render(game, maps):
        print(game.canvas['title'])
        print(game.canvas['score'])
        for i in range(len(maps)):
            print maps[i]
        print(game.canvas['tips'])

    # 生成图像
    maps = []
    maps.append(game.canvas['splitor'])
    for i in range(game.level):
        row = '|'
        for j in range(game.level):
            if game.layout[i][j] == 0:
                row += padding(' ', game.size) + '|'
            else:
                row += padding(str(game.layout[i][j]), game.size) + '|'
        maps.append(row)
        maps.append(game.canvas['splitor'])

    # 渲染图像
    _render(game, maps)
Пример #20
0
    def create_cond_batches(self, data_file):
        self.cond_stream = []
        with open(data_file, 'r') as f:
            for line in f:
                line = line.strip()
                line = line.split()
                parse_line = [int(x) for x in line]
                self.cond_stream.append(
                    padding(parse_line, self.cond_length, self.unk))

        self.num_batch = int(len(self.cond_stream) / self.batch_size)
        self.cond_stream = self.cond_stream[:self.num_batch * self.batch_size]
        self.cond_batch = np.split(np.array(self.cond_stream), self.num_batch,
                                   0)
        self.cond_pointer = 0
Пример #21
0
    def preprocess_setup(self):
        if self.opt.plus:
            normalize = U.zero_mean
        else:
            normalize = U.normalize
        if self.train and self.opt.noDataAug != True:
            funcs = [normalize(self.mean, self.std),
                     U.horizontal_flip(),
                     U.padding(4),
                     U.random_crop(32),
                     ]
        else:
            funcs = [normalize(self.mean, self.std)]

        return funcs
Пример #22
0
 def load_cond(self, input_file, cond_length, unk):
     """Get a list of unique conditions"""
     f = codecs.open(input_file, 'r', 'utf-8')
     conditions = []
     lines = f.readlines()
     if lines[-1].strip() == '':
         print("deleted the last element:", lines[-1])
         lines = lines[:-1]
     lines = list(set(lines))
     for line in lines:
         line = map(int, line.strip().split())
         line = padding(line, cond_length, unk)
         if not line in conditions:
             conditions.append(line)
     self.cond = np.array(conditions)
     self.n_cond = len(conditions)
Пример #23
0
    def forward(self, xs, train):
        """
        xs: [[int=ID]]
        ys: T x (N,T)
        """
        xs, ms = utils.padding(xs, head=True, with_mask=True)  # (N, T), (N, T)
        xs = utils.convert_ndarray_to_variable(xs, seq=True,
                                               train=train)  # T x (N,)
        ms = utils.convert_ndarray_to_variable(ms, seq=True,
                                               train=train)  # T x (N,)

        es = self.embed_words(xs, train=train)
        g = self.aggregate(es, ms)
        # Note: Here, we assume that "es" follows the original order
        ys, outputs = self.reorder(es, g, ms, train=train)  # bottleneck
        return ys, outputs
Пример #24
0
 def __getitem__(self, idx):
     label = self.Label_list[idx]
     if self.stage == 'train' and not self.whole:
         data = np.load(self.Data_dir + self.Data_list[idx] + '.npy',
                        mmap_mode='r').astype(np.float32)
         patch = self.patch_sampler.random_sample(data)
         if self.transform:
             patch = self.transform.apply(patch).astype(np.float32)
         patch = np.expand_dims(patch, axis=0)
         return patch, label
     else:
         data = np.load(self.Data_dir + self.Data_list[idx] +
                        '.npy').astype(np.float32)
         data = np.expand_dims(padding(data, win_size=self.patch_size // 2),
                               axis=0)
         return data, label
Пример #25
0
def get_haiku_conddis(kigo):
    with open(path_to_token2id) as f:
        token2id = pickle.load(f)
    vocab_size = len(token2id)
    UNK = token2id.get('<UNK>', 0)

    generator = Generator(vocab_size,
                          BATCH_SIZE,
                          EMB_DIM,
                          HIDDEN_DIM,
                          SEQ_LENGTH,
                          COND_LENGTH,
                          START_TOKEN,
                          is_cond=1)
    discriminator = Discriminator(sequence_length=SEQ_LENGTH,
                                  cond_length=COND_LENGTH,
                                  num_classes=2,
                                  vocab_size=vocab_size,
                                  batch_size=BATCH_SIZE,
                                  embedding_size=dis_embedding_dim,
                                  filter_sizes=dis_filter_sizes,
                                  num_filters=dis_num_filters,
                                  l2_reg_lambda=dis_l2_reg_lambda,
                                  is_cond=1)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess, path_to_generator)

    if isinstance(kigo, str):
        kigo = kigo.decode('utf-8')
    cond = map(lambda x: token2id.get(x, 0), [kigo])
    cond = np.array(padding(cond, COND_LENGTH, UNK) * BATCH_SIZE).reshape(
        BATCH_SIZE, COND_LENGTH)
    generated_sequences = generator.generate(sess, cond=cond)
    sess.close()
    id2token = {k: v for v, k in token2id.items()}
    generated_haikus = map(
        lambda y: map(lambda x: id2token.get(x, '<UNK>'), y),
        generated_sequences)

    generated_haikus = map(lambda x: re.sub(r' <UNK>', '', ' '.join(x)),
                           generated_haikus)

    return generated_haikus
Пример #26
0
 def gen_file(self, data_set, size=True):
     if size:
         size = self.config.batch_size
         samples = random.sample(list(data_set), size)
     else:
         samples = data_set
     p, q, a = zip(samples)
     inputs = [q[i] + [0] + p[i] for i in range(size)]
     length = [len(it) for it in inputs]
     max_len = max(length)
     inputs = padding(inputs, max_len)
     for row in inputs:
         for col in row:
             if col < 0 or col >= Config.vocab_size:
                 print("BUG!!!!!! index: {}".format(col))
     labels = np.array(a).flatten()
     length = np.array(length)
     return inputs, length, labels
Пример #27
0
def pastex9(img):
    width, height = img.size
    imgx9 = padding(img, (height * 3, width * 3), True)
    angles = [-30, -60, -90, 90, 30, 60, 45, -45]
    xys = [
        [0, 0],
        [width, 0],
        [width * 2, 0],
        [0, height],
        [width * 2, height],
        [0, height * 2],
        [width, height * 2],
        [width * 2, height * 2],
    ]
    for angle, (x, y) in zip(angles, xys):
        aimg = img.rotate(angle, resample=Image.BICUBIC)
        imgx9.paste(aimg, (x, y, x + width, y + height))
    return imgx9
Пример #28
0
 def func(flag, maxlen=maxlen):
     if flag == 'lens':
         lens = np.array(map(len, self.AccGyo[cur_id]))
         lens = [get_closest_maxlen(L) for L in lens]
         return lens
     elif flag == 'AccGyo' or flag == 'XYZ':
         res = self.__dict__[flag][cur_id]
         maxlen = maxlen or max(map(len, res))
         maxlen = get_closest_maxlen(maxlen)
         res = padding(res, maxlen)
         if self.time_major:
             res = np.transpose(res, (1, 0, 2))
         if self.expand_dim:
             res = np.expand_dims(res, -1)
         return res
     elif flag == 'labels':
         return self.labels[cur_id]
     elif flag == 'filenames':
         return self.fs[cur_id]
Пример #29
0
 def gen_file(self):
     samples = random.sample(list(self.data_set), 1)
     gen_tuples = questions_to_token_ids(samples, self.vocabulary)
     print(gen_tuples)
     print("shape: {}".format(np.asarray(gen_tuples).shape))
     inp = input("==========\n")
     p, q, a = zip(*gen_tuples)
     print("p: {}\tq: {}\ta: {}".format(
         np.asarray(p).shape,
         np.asarray(q).shape,
         np.asarray(a).shape))
     inp = input("==========\n")
     inputs = np.concatenate((p, np.zeros(
         (Config.batch_size, 1), dtype=int), q),
                             axis=1)
     length = [len(it) for it in inputs]
     max_len = max(length)
     inputs = [padding(seq, max_len) for seq in inputs]
     return inputs, length, a
Пример #30
0
def forward(
        model, batch_sents, batch_labels,
        lmd, identity_penalty,
        train):
    ys, _ = model.forward(batch_sents, train=train) # T x (N,T)    
    ys = F.concat(ys, axis=0) # => (T*N, T)

    ts, M = utils.padding(batch_labels, head=True, with_mask=True) # => (N, T), (N, T)
    ts = ts.T # => (T, N)
    ts = ts.reshape(-1,) # => (T*N,)
    M = M[:,None,:] * M[:,:,None] # => (N, T, T)
    ts = utils.convert_ndarray_to_variable(ts, seq=False, train=train) # => (T*N,)
    M = utils.convert_ndarray_to_variable(M, seq=False, train=train) # => (N, T, T)

    loss = F.softmax_cross_entropy(ys, ts)
    acc = F.accuracy(ys, ts, ignore_label=-1)

    if identity_penalty:
        loss_id = loss_identity_penalty(ys, M, train=train)
        loss = loss + lmd * loss_id
    return loss, acc
Пример #31
0
from layers import LSTM, FullConnected, TimeDistributed
from models import Decoder, Encoder, Seq2seq, Sequential
from utils import masking, padding

rng.seed(123)

# Preprocess data
x1 = [2, 1, 1, 1, 2, 4, 2]
x2 = [2, 1]
x3 = [2, 1, 4, 3, 1]
batch_value = np.asarray([x1, x2, x3])

vocab_size = 5
embedding_size = 4
encoder_hidden_size = 6

encoder = Encoder(vocab_size + 1, embedding_size, encoder_hidden_size)
mask_value = masking(batch_value)
padded_batch_value = padding(batch_value, 0)

mask = shared(mask_value, name='mask')
padded_batch = shared(padded_batch_value, name='padded_batch')
H, C = encoder.forward(padded_batch, mask)

(h1, c1) = encoder.forward2(x1)
(h2, c2) = encoder.forward2(x2)
(h3, c3) = encoder.forward2(x3)

print(T.isclose(H, T.as_tensor_variable([h1, h2, h3])).eval())
print(T.isclose(C, T.as_tensor_variable([c1, c2, c3])).eval())
Пример #32
0
    def begin_train(self):
        N_EPOCHS = 30
        N_BATCH = 5
        N_TRAIN_INS = len(self.train_ending)
        best_val_accuracy = 0
        best_test_accuracy = 0

        for epoch in range(N_EPOCHS):
            print "epoch ", epoch,":"
            shuffled_index_list = utils.shuffle_index(N_TRAIN_INS)

            max_batch = N_TRAIN_INS/N_BATCH

            start_time = time.time()

            batch_count = 0
            for batch in range(max_batch):
                batch_index_list = [shuffled_index_list[i] for i in range(batch * N_BATCH, (batch+1) * N_BATCH)]
                train_story = [self.train_story[index] for index in batch_index_list]
                train_ending = [self.train_ending[index] for index in batch_index_list]

                neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,))
                while np.any((np.asarray(batch_index_list) - neg_end_index_list) == 0):
                    neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,))
                neg_end1 = [self.train_ending[index] for index in neg_end_index_list]

                train_story_matrix = utils.padding(train_story)
                train_ending_matrix = utils.padding(train_ending)
                neg_ending1_matrix = utils.padding(neg_end1)

                train_story_mask = utils.mask_generator(train_story)
                train_ending_mask = utils.mask_generator(train_ending)
                neg_ending1_mask = utils.mask_generator(neg_end1)

                self.train_func(train_story_matrix, train_story_mask, 
                                train_ending_matrix, train_ending_mask,
                                neg_ending1_matrix, neg_ending1_mask)

                batch_count += 1

                if batch_count != 0 and batch_count % 10 == 0:
                    print "--", time.time() - start_time,"seconds--for 10 batches"
                    start_time = time.time()

                # peek on val set every 5000 instances(1000 batches)
                if batch_count != 0 and batch_count % 2000 == 0:
                    print"test on valid set..."
                    val_result = self.val_set_test()
                    print "accuracy is: ", val_result*100, "%"
                    if val_result > best_val_accuracy:
                        print "new best! test on test set..."
                        best_val_accuracy = val_result
                        self.saving_model('val')
                        test_accuracy = self.test_set_test()
                        print "test set accuracy: ", test_accuracy, "%"
                        if test_accuracy > best_test_accuracy:
                            best_test_accuracy = test_accuracy
                            print "saving model..."
                            self.saving_model('test')



        print "reload best model for testing on test set"
        self.reload_model('val')

        print "test on test set..."
        test_result = self.test_set_test()
        print "accuracy is: ", test_result * 100, "%"
Пример #33
0
def run_FUCOS(**kwargs):
    training_data = kwargs.get('training_data')
    validation_data = kwargs.get('validation_data')
    batchsize = kwargs.get('batchsize')
    TRAIN = kwargs.get('TRAIN', True)
    run = kwargs.get('run')

    config_sess = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    config_sess.gpu_options.allow_growth = True
    sess = tf.InteractiveSession(config=config_sess)

    #build the model
    model = []
    with tf.device('/gpu:2'):
        x = tf.placeholder(tf.float32, (None, 135, 240, 3), 'input')
        y_ = tf.placeholder(tf.float32, (None, 135, 240, 1), 'gt')
        keep_prob = tf.placeholder(tf.float32, name='dropout_prob')

        with tf.variable_scope('conv1'):
            conv1 = layers.ConvolutionalLayer(x, [135, 240, 3], [3, 3, 3, 64])
            model.append(conv1)
        with tf.variable_scope('conv2'):
            conv2 = layers.ConvolutionalLayer(conv1.output(), conv1.get_output_shape(), [3, 3, 64, 64], pool=True)
            model.append(conv2)

        with tf.variable_scope('conv3'):
            conv3 = layers.ConvolutionalLayer(conv2.output(), conv2.get_output_shape(), [3, 3, 64, 128])
            model.append(conv3)
        with tf.variable_scope('conv4'):
            conv4 = layers.ConvolutionalLayer(conv3.output(), conv3.get_output_shape(), [3, 3, 128, 128], pool=True)
            model.append(conv4)

        with tf.variable_scope('conv5'):
            conv5 = layers.ConvolutionalLayer(conv4.output(), conv4.get_output_shape(), [3, 3, 128, 256])
            model.append(conv5)
        with tf.variable_scope('conv6'):
            conv6 = layers.ConvolutionalLayer(conv5.output(), conv5.get_output_shape(), [3, 3, 256, 256])
            model.append(conv6)
        with tf.variable_scope('conv7'):
            conv7 = layers.ConvolutionalLayer(conv6.output(), conv6.get_output_shape(), [3, 3, 256, 256], pool=True)
            model.append(conv7)

        with tf.variable_scope('conv8'):
            conv8 = layers.ConvolutionalLayer(conv7.output(), conv7.get_output_shape(), [3, 3, 256, 512])
            model.append(conv8)
        with tf.variable_scope('conv9'):
            conv9 = layers.ConvolutionalLayer(conv8.output(), conv8.get_output_shape(), [3, 3, 512, 512])
            model.append(conv9)
        with tf.variable_scope('conv10'):
            conv10 = layers.ConvolutionalLayer(conv9.output(), conv9.get_output_shape(), [3, 3, 512, 512], pool=True)
            model.append(conv10)

        with tf.variable_scope('conv11'):
            conv11 = layers.ConvolutionalLayer(conv10.output(), conv10.get_output_shape(), [3, 3, 512, 512])
            model.append(conv11)
        with tf.variable_scope('conv12'):
            conv12 = layers.ConvolutionalLayer(conv11.output(), conv11.get_output_shape(), [3, 3, 512, 512])
            model.append(conv12)
        with tf.variable_scope('conv13'):
            conv13 = layers.ConvolutionalLayer(conv12.output(), conv12.get_output_shape(), [3, 3, 512, 512], pool=True)
            model.append(conv13)

        with tf.variable_scope('conv14'):
            conv14 = layers.ConvolutionalLayer(conv13.output(), conv13.get_output_shape(), [7, 7, 512, 4096], drop_out=True,
                                               drop_out_prob=keep_prob)
            model.append(conv14)
        with tf.variable_scope('conv15'):
            conv15 = layers.ConvolutionalLayer(conv14.output(), conv14.get_output_shape(), [1, 1, 4096, 4096], drop_out=True,
                                               drop_out_prob=keep_prob)
            model.append(conv15)
        with tf.variable_scope('convtrans1'):
            deconv1 = layers.ConvolutionalTransposeLayer(conv15.output(), [4, 4, 60, 4096], None)
            model.append(deconv1)
        with tf.variable_scope('conv16'):
            conv16 = layers.ConvolutionalLayer(conv10.output(), conv10.get_output_shape(), [1, 1, 512, 60])
            model.append(conv16)
        conv16_output = conv16.output()
        sum1 = conv16_output + tf.image.resize_images(deconv1.output(), (tf.shape(conv16_output)[1],
                                                                         tf.shape(conv16_output)[2]))

        with tf.variable_scope('convtrans2'):
            deconv2 = layers.ConvolutionalTransposeLayer(sum1, [4, 4, 60, 60], None)
            model.append(deconv2)
        with tf.variable_scope('conv17'):
            conv17 = layers.ConvolutionalLayer(conv7.output(), conv7.get_output_shape(), [1, 1, 256, 60])
            model.append(conv17)
        conv17_output = conv17.output()
        sum2 = conv17_output + tf.image.resize_images(deconv2.output(), (tf.shape(conv17_output)[1],
                                                                         tf.shape(conv17_output)[2]))

        with tf.variable_scope('convtrans3'):
            deconv3 = layers.ConvolutionalTransposeLayer(sum2, [16, 16, 60, 60], None, deconv_stride=(1, 8, 8, 1))
            model.append(deconv3)

        with tf.variable_scope('conv18'):
            conv18 = layers.ConvolutionalLayer(deconv3.output(), deconv3.get_output_shape(), [1, 1, 60, 12])
            model.append(conv18)
        with tf.variable_scope('conv19'):
            conv19 = layers.ConvolutionalLayer(conv18.output(), conv18.get_output_shape_tensor(), [1, 1, 12, 1],
                                               activation=function['linear'])
            model.append(conv19)

        y_pre_activation = tf.image.resize_images(conv19.output(), (135, 240)) #resize to match the ground truth's shape
        y_pred = function['sigmoid'](y_pre_activation) #activate the output by sigmoid

        cost = metrics.MultinoulliCrossEntropy(y_pre_activation, y_) #use binary cross entropy
        var_list = tf.get_collection(tf.GraphKeys().TRAINABLE_VARIABLES)
        L2 = sum([tf.reduce_mean(tf.square(theta)) #L2 regularization
              for theta in (weight for weight in var_list if 'weights' in weight.name)])
        cost += 1e-4 * L2

        opt = tf.train.AdamOptimizer(1e-3, 0.9, 0.99, 1e-8).minimize(cost, var_list=var_list) #ADAM optimization
        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.cast(y_pred >= 0.5, tf.uint8), tf.cast(y_, tf.uint8)), tf.float32))
        saver = tf.train.Saver()

        if TRAIN:
            tf.Operation.run(tf.global_variables_initializer())
            print('Loading VGG16 weights...')
            load_weights('pretrained/vgg16_weights.npz', model, sess) #load pretrained VGG16 weights

            best_valid_accuracy = 0.
            best_valid_loss = np.inf
            best_epoch = 0
            epoch = 0
            vote_to_terminate = 0
            done_looping = False
            print('TRAINING...')
            start_training_time = time.time()
            while epoch < 200 and not done_looping:
                epoch += 1
                num_iter_training = int(training_data[0].shape[0] / batchsize)
                losses_train = 0.
                accuracies_train = 0.
                start_batch_time = time.time()
                print('Epoch %d...' % epoch)
                batch = next_batch(training_data, batchsize) #training
                for b in batch:
                    fd = {x: b[0], y_: b[1], keep_prob: 0.3}
                    _, a, l = sess.run([opt, accuracy, cost], feed_dict=fd)
                    assert not np.isnan(l), 'Train failed with loss being NaN'
                    losses_train += l
                    accuracies_train += a

                print('\ttraining loss: %s' % (losses_train / num_iter_training))
                print('\ttraining accuracy: %s' % (accuracies_train / num_iter_training))
                print('\tepoch %d took %.2f hours' % (epoch, (time.time() - start_batch_time) / 3600.))

                num_iter_valid = int(validation_data[0].shape[0] / batchsize)
                losses_valid = 0.
                accuracies_valid = 0.
                start_valid_time = time.time()
                batch = next_batch(validation_data, batchsize) #validation
                for b in batch:
                    fd = {x: b[0], y_: b[1], keep_prob: 1}
                    l, a = sess.run([cost, accuracy], feed_dict=fd)
                    losses_valid += l
                    accuracies_valid += a
                avr_acc_valid = accuracies_valid / num_iter_valid
                losses_valid /= num_iter_valid

                print('\tvalidation took %.2f hours' % ((time.time() - start_valid_time) / 3600.))
                print('\tvalidation loss: %s' % losses_valid)
                print('\tvalidation accuracy: %s' % avr_acc_valid)

                if losses_valid < best_valid_loss:
                    best_valid_loss = losses_valid
                    best_epoch = epoch
                    vote_to_terminate = 0
                    print('\tbest validation loss achieved: %.4f' % best_valid_loss)
                    save_path = saver.save(sess, run)
                    print("\tmodel saved in file: %s" % save_path)
                else:
                    vote_to_terminate += 1

                if vote_to_terminate > 30:
                    done_looping = True
            print('Training ends after %.2f hours' % ((time.time() - start_training_time) / 3600.))
            print('\tbest validation accuracy: %.2f' % best_valid_accuracy)
            print('Training the model using all data available...')
            total_training_data = (np.concatenate((training_data[0], validation_data[0])),
                                   np.concatenate((training_data[1], validation_data[1])))
            for i in range(best_epoch):
                num_iter_training = int(total_training_data[0].shape[0] / batchsize)
                losses_train = 0.
                start_batch_time = time.time()
                print('Epoch %d...' % (i+1))
                batch = next_batch(total_training_data, batchsize) #training
                for b in batch:
                    fd = {x: b[0], y_: b[1], keep_prob: 0.1}
                    _, _, l = sess.run([opt, accuracy, cost], feed_dict=fd)
                    assert not np.isnan(l), 'Train failed with loss being NaN'
                    losses_train += l

                print('\ttraining loss: %s' % (losses_train / num_iter_training))
                print('\tepoch %d took %.2f hours' % (i+1, (time.time() - start_batch_time) / 3600.))

        else: #testing
            path = kwargs.get('testing_path')
            isfolder = kwargs.get('isfolder')

            image_list = [path + '/' + f for f in os.listdir(path) if f.endswith('.jpg')] if isfolder else [path]
            saver.restore(sess, tf.train.latest_checkpoint(run))
            print('Checkpoint restored...')
            print('Testing %d images...' % len(image_list))
            images = []
            predictions = []
            time.sleep(0.1)
            for i in tqdm.tqdm(range(len(image_list)), unit='images'):
                ori_img = misc.imread(image_list[i])
                if len(ori_img.shape) < 3:
                    continue
                img = padding(ori_img, 135, 240)
                img = np.reshape(img, (1, 135, 240, 3)) / 255.
                fd = {x: img, keep_prob: 1}
                pred = sess.run(y_pred, feed_dict=fd)
                images.append(ori_img)
                predictions.append(pred)
            time.sleep(0.1)
            print('Testing finished!')

            for i in range(len(images)):
                plt.figure(1)
                image = images[i]
                sal = np.reshape(predictions[i], (135, 240))
                sal = depadding(sal, image.shape[0], image.shape[1])
                sal = sal * (sal > np.percentile(sal, 95))
                sal = gaussian_filter(sal, sigma=0.09*sal.shape[0])
                sal = (sal - np.min(sal)) / (np.max(sal) - np.min(sal))
                plt.subplot(211)
                plt.imshow(image)
                plt.subplot(212)
                plt.imshow(sal, cmap='gray')
                plt.show()
    def begin_train(self):
        N_EPOCHS = 30
        N_BATCH = 5
        N_TRAIN_INS = len(self.train_ending)
        best_val_accuracy = 0
        best_test_accuracy = 0
        test_threshold = 2000.0
        prev_percentage = 0.0
        speed = 0.0
        batch_count = 0.0

        for epoch in range(N_EPOCHS):
            print "epoch ", epoch,":"
            shuffled_index_list = utils.shuffle_index(N_TRAIN_INS)

            max_batch = N_TRAIN_INS/N_BATCH

            start_time = time.time()

            
            for batch in range(max_batch):
                batch_index_list = [shuffled_index_list[i] for i in range(batch * N_BATCH, (batch+1) * N_BATCH)]
                train_story = [self.train_story[index] for index in batch_index_list]
                train_ending = [self.train_ending[index] for index in batch_index_list]

                neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,))
                while np.any((np.asarray(batch_index_list) - neg_end_index_list) == 0):
                    neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,))
                neg_end1 = [self.train_ending[index] for index in neg_end_index_list]

                train_story_matrix = utils.padding(train_story)
                train_ending_matrix = utils.padding(train_ending)
                neg_ending1_matrix = utils.padding(neg_end1)

                train_story_mask = utils.mask_generator(train_story)
                train_ending_mask = utils.mask_generator(train_ending)
                neg_ending1_mask = utils.mask_generator(neg_end1)

                self.train_func(train_story_matrix, train_story_mask, 
                                train_ending_matrix, train_ending_mask,
                                neg_ending1_matrix, neg_ending1_mask)

                if batch_count != 0 and batch_count % 10 == 0:
                    speed = N_BATCH * 10.0 / (time.time() - start_time)
                    start_time = time.time()

                percentage = ((batch_count % test_threshold) + 1) / test_threshold * 100
                if percentage - prev_percentage >= 1:
                    utils.progress_bar(percentage, speed)

                # peek on val set every 5000 instances(1000 batches)
                if batch_count % test_threshold == 0:
                    if batch_count == 0:
                        print "initial test"
                    else:
                        print" "
                    print"test on valid set..."
                    val_result, val_result_list = self.val_set_test()
                    print "accuracy is: ", val_result*100, "%"
                    if val_result > best_val_accuracy:
                        print "new best! test on test set..."
                        best_val_accuracy = val_result
                        self.saving_model('val', best_val_accuracy)
                        pickle.dump(val_result_list, open('./prediction/LSTM_last_1neg_sharewemb_best_val_prediction.pkl','wb'))

                        test_accuracy, test_result_list = self.test_set_test()
                        print "test set accuracy: ", test_accuracy * 100, "%"
                        if test_accuracy > best_test_accuracy:
                            best_test_accuracy = test_accuracy
                            print "saving model..."
                            self.saving_model('test', best_test_accuracy)
                            pickle.dump(test_result_list, open('./prediction/LSTM_last_1neg_sharewemb_best_test_prediction.pkl','wb'))

                batch_count += 1

        print "reload best model for testing on test set"
        self.reload_model('val')

        print "test on test set..."
        test_result = self.test_set_test()
        print "accuracy is: ", test_result * 100, "%"
    def begin_train(self):
        N_EPOCHS = 30
        N_BATCH = 20
        N_TRAIN_INS = len(self.train_ending)
        best_val_accuracy = 0
        best_test_accuracy = 0
        test_threshold = 1000.0
        prev_percetage = 0.0
        speed = 0.0
        batch_count = 0.0

        for epoch in range(N_EPOCHS):
            print "epoch ", epoch,":"
            shuffled_index_list = utils.shuffle_index(N_TRAIN_INS)

            max_batch = N_TRAIN_INS/N_BATCH

            start_time = time.time()

            total_cost = 0.0
            total_err_count = 0.0

            for batch in range(max_batch):
                batch_index_list = [shuffled_index_list[i] for i in range(batch * N_BATCH, (batch+1) * N_BATCH)]
                train_story = [self.train_story[index] for index in batch_index_list]
                train_ending = [self.train_ending[index] for index in batch_index_list]

                neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,))
                while np.any((np.asarray(batch_index_list) - neg_end_index_list) == 0):
                    neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,))
                neg_end1 = [self.train_ending[index] for index in neg_end_index_list]

                answer = np.random.randint(2, size = N_BATCH)
                answer_vec = np.concatenate((answer.reshape(-1,1), (1 - answer).reshape(-1,1)), axis = 1).astype('int64')
                train_end1 = []
                train_end2 = []
                for i in range(N_BATCH):
                    if answer[i] == 0:
                        train_end1.append(train_ending[i])
                        train_end2.append(neg_end1[i])
                    else:
                        train_end1.append(neg_end1[i])
                        train_end2.append(train_ending[i])

                train_story_matrix = utils.padding(train_story)
                train_end1_matrix = utils.padding(train_end1)
                train_end2_matrix = utils.padding(train_end2)

                train_story_mask = utils.mask_generator(train_story)
                train_end1_mask = utils.mask_generator(train_end1)
                train_end2_mask = utils.mask_generator(train_end2)

                

                cost = self.train_func(train_story_matrix, train_story_mask, 
                                        train_end1_matrix, train_end1_mask,
                                        train_end2_matrix, train_end2_mask, answer_vec)

                prediction = self.prediction(train_story_matrix, train_story_mask,
                                             train_end1_matrix, train_end1_mask,
                                             train_end2_matrix, train_end2_mask)

                total_err_count += (prediction - answer).sum()
                if batch_count != 0 and batch_count % 10 == 0:
                    speed = N_BATCH * 10.0 / (time.time() - start_time)
                    start_time = time.time()

                percetage = ((batch_count % test_threshold)+1) / test_threshold * 100
                if percetage - prev_percetage >= 1:
                    utils.progress_bar(percetage, speed)

                # peek on val set every 5000 instances(1000 batches)
                if batch_count % test_threshold == 0:
                    if batch_count == 0:
                        print "initial test"
                    else:
                        print" "
                    print"test on valid set..."
                    val_result, val_result_list = self.val_set_test()
                    print "accuracy is: ", val_result*100, "%"
                    if val_result > best_val_accuracy:
                        print "new best! test on test set..."
                        best_val_accuracy = val_result
                        self.saving_model('val', best_val_accuracy)
                        pickle.dump(val_result_list, open('./prediction/BLSTM_1neg_class_best_val_prediction.pkl','wb'))

                        test_accuracy, test_result_list = self.test_set_test()
                        print "test set accuracy: ", test_accuracy * 100, "%"
                        if test_accuracy > best_test_accuracy:
                            best_test_accuracy = test_accuracy
                            print "saving model..."
                            self.saving_model('test', best_test_accuracy)
                            pickle.dump(test_result_list, open('./prediction/BLSTM_1neg_class_best_test_prediction.pkl','wb'))

                batch_count += 1
            total_cost += cost
            accuracy = 1-(total_err_count/(max_batch*N_BATCH))
            print ""
            print "total cost in this epoch: ", total_cost
            print "accuracy in this epoch: ", accuracy * 100, "%"


        print "reload best model for testing on test set"
        self.reload_model('val')

        print "test on test set..."
        test_result = self.test_set_test()
        print "accuracy is: ", test_result * 100, "%"
    def begin_train(self):
        N_EPOCHS = 30
        N_BATCH = self.batchsize
        N_TRAIN_INS = len(self.train_ending)
        best_val_accuracy = 0
        best_test_accuracy = 0
        test_threshold = 5000/N_BATCH
        prev_percetage = 0.0
        speed = 0.0
        batch_count = 0.0
        start_batch = 0.0

        for epoch in range(N_EPOCHS):
            print "epoch ", epoch,":"
            shuffled_index_list = utils.shuffle_index(N_TRAIN_INS)

            max_batch = N_TRAIN_INS/N_BATCH

            start_time = time.time()

            total_cost = 0.0
            total_err_count = 0.0

            for batch in range(max_batch):
                batch_index_list = [shuffled_index_list[i] for i in range(batch * N_BATCH, (batch+1) * N_BATCH)]
                train_story = [self.train_story[index] for index in batch_index_list]
                train_ending = [self.train_ending[index] for index in batch_index_list]

                neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,))
                while np.any((np.asarray(batch_index_list) - neg_end_index_list) == 0):
                    neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,))
                neg_end1 = [self.train_ending[index] for index in neg_end_index_list]

                answer = np.random.randint(2, size = N_BATCH)
                target1 = 1 - answer
                target2 = answer
                # answer_vec = np.concatenate(((1 - answer).reshape(-1,1), answer.reshape(-1,1)),axis = 1)
                end1 = []
                end2 = []

                for i in range(N_BATCH):
                    if answer[i] == 0:
                        end1.append(train_ending[i])
                        end2.append(neg_end1[i])
                    else:
                        end1.append(neg_end1[i])
                        end2.append(train_ending[i])



                train_story_matrix = utils.padding(train_story)
                train_end1_matrix = utils.padding(end1)
                train_end2_matrix = utils.padding(end2)

                train_story_mask = utils.mask_generator(train_story)
                train_end1_mask = utils.mask_generator(end1)
                train_end2_mask = utils.mask_generator(end2)
                

                cost = self.train_func(train_story_matrix, train_story_mask, 
                                        train_end1_matrix, train_end1_mask,
                                        train_end2_matrix, train_end2_mask, target1, target2)

                prediction1, prediction2 = self.prediction(train_story_matrix, train_story_mask,
                                             train_end1_matrix, train_end1_mask,
                                             train_end2_matrix, train_end2_mask)

                prediction = np.concatenate((np.max(prediction1, axis = 1).reshape(-1,1), 
                             np.max(prediction2, axis = 1).reshape(-1,1)), axis = 1)
                total_err_count += abs((np.argmax(prediction, axis = 1) - answer)).sum()

                '''master version print'''
                percetage = ((batch_count % test_threshold)+1) / test_threshold * 100
                if percetage - prev_percetage >= 1:
                    speed = N_BATCH * (batch_count - start_batch) / (time.time() - start_time)
                    start_time = time.time()
                    start_batch = batch_count
                    utils.progress_bar(percetage, speed)
                '''end of print'''
                    
                # peek on val set every 5000 instances(1000 batches)
                if batch_count % test_threshold == 0:
                    if batch_count == 0:
                        print "initial test"
                    else:
                        print" "
                    print"test on valid set..."
                    val_result, val_result_list = self.val_set_test()
                    print "accuracy is: ", val_result*100, "%"
                    if val_result > best_val_accuracy:
                        print "new best! test on test set..."
                        best_val_accuracy = val_result
                        self.saving_model('val', best_val_accuracy)
                        pickle.dump(val_result_list, open('./prediction/BLSTMLP_'+self.blstmmlp_setting+'_bilinear_'+\
                                        'dropout'+str(self.dropout_rate)+'_batch_'+str(self.batchsize)+'_best_val.pkl','wb'))

                        test_accuracy, test_result_list = self.test_set_test()
                        print "test set accuracy: ", test_accuracy * 100, "%"
                        if test_accuracy > best_test_accuracy:
                            best_test_accuracy = test_accuracy
                            print "saving model..."
                            self.saving_model('test', best_test_accuracy)
                            pickle.dump(test_result_list, open('./prediction/BLSTMLP_'+self.blstmmlp_setting+'_bilinear_'+\
                                        'dropout'+str(self.dropout_rate)+'_batch_'+str(self.batchsize)+'_best_test.pkl','wb'))

                batch_count += 1
            total_cost += cost
            accuracy = 1.0 - (total_err_count/(max_batch*N_BATCH))
            speed = max_batch * N_BATCH / (time.time() - start_time)
            print "======================================="
            print "epoch summary:"
            print "average speed: ", speed, "instances/sec"

            print ""
            print "total cost in this epoch: ", total_cost
            print "accuracy in this epoch: ", accuracy * 100, "%"
            print "======================================="


        print "reload best model for testing on test set"
        self.reload_model('val')

        print "test on test set..."
        test_result = self.test_set_test()
        print "accuracy is: ", test_result * 100, "%"
Пример #37
0
 def encryptPK(self, data, password, sharedKey):
     iv = os.urandom(16)
     data = padding(data)
     key = PBKDF2(sharedKey + password, iv, iterations=10).read(32)
     cipher = AES.new(key, AES.MODE_CBC, iv)
     return (iv + cipher.encrypt(data)).encode('base64')
Пример #38
0
 def encrypt(self, key, cipherdata):
     iv = os.urandom(16)
     cipherdata = padding(cipherdata)
     key = PBKDF2(key, iv, iterations=10).read(32)
     cipher = AES.new(key, AES.MODE_CBC, iv)
     return iv + cipher.encrypt(cipherdata)
Пример #39
0
    def begin_train(self):
        N_EPOCHS = 30
        N_BATCH = self.batchsize
        N_TRAIN_INS = len(self.val_answer)
        best_val_accuracy = 0
        best_test_accuracy = 0
        test_threshold = 1000/N_BATCH
        prev_percetage = 0.0
        speed = 0.0
        batch_count = 0.0
        start_batch = 0.0

        for epoch in range(N_EPOCHS):
            print "epoch ", epoch,":"
            shuffled_index_list = utils.shuffle_index(N_TRAIN_INS)

            max_batch = N_TRAIN_INS/N_BATCH

            start_time = time.time()

            total_cost = 0.0
            total_err_count = 0.0

            for batch in range(max_batch):
                batch_index_list = [shuffled_index_list[i] for i in range(batch * N_BATCH, (batch+1) * N_BATCH)]
                train_story = [[self.val_story[index][i] for index in batch_index_list] for i in range(self.story_nsent)]
                train_ending = [self.val_ending1[index] for index in batch_index_list]

                # neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,))
                # while np.any((np.asarray(batch_index_list) - neg_end_index_list) == 0):
                #     neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,))
                # neg_end1 = [self.train_ending[index] for index in neg_end_index_list]
                neg_end1 = [self.val_ending2[index] for index in batch_index_list]
                # answer = np.random.randint(2, size = N_BATCH)
                # target1 = 1 - answer
                # target2 = 1 - target1
                answer = np.asarray([self.val_answer[index] for index in batch_index_list])

                target1 = 1 - answer
                target2 = answer
                # answer_vec = np.concatenate(((1 - answer).reshape(-1,1), answer.reshape(-1,1)),axis = 1)
                end1 = []
                end2 = []

                # for i in range(N_BATCH):
                #     if answer[i] == 0:
                #         end1.append(train_ending[i])
                #         end2.append(neg_end1[i])
                #     else:
                #         end1.append(neg_end1[i])
                #         end2.append(train_ending[i])

                for i in range(N_BATCH):
                    end1.append(train_ending[i])
                    end2.append(neg_end1[i])

                train_story_matrices = [utils.padding(batch_sent) for batch_sent in train_story]
                train_end1_matrix = utils.padding(end1)
                train_end2_matrix = utils.padding(end2)

                train_story_mask = [utils.mask_generator(batch_sent) for batch_sent in train_story]
                train_end1_mask = utils.mask_generator(end1)
                train_end2_mask = utils.mask_generator(end2)
                

                cost, prediction1, prediction2 = self.train_func(train_story_matrices[0], train_story_matrices[1], train_story_matrices[2],
                                                               train_story_matrices[3], train_end1_matrix, train_end2_matrix,
                                                               train_story_mask[0], train_story_mask[1], train_story_mask[2],
                                                               train_story_mask[3], train_end1_mask, train_end2_mask, target1, target2)



                prediction = np.argmax(np.concatenate((prediction1, prediction2), axis = 1), axis = 1)
                predict_answer = np.zeros((N_BATCH, ))
                for i in range(N_BATCH):
                    if prediction[i] == 0:
                        predict_answer[i] = 1
                    elif prediction[i] == 1:
                        predict_answer[i] = 0
                    elif prediction[i] == 2:
                        predict_answer[i] = 0
                    else:
                        predict_answer[i] = 1

                total_err_count += (abs(predict_answer - answer)).sum()



                # peek on val set every 5000 instances(1000 batches)
                if batch_count % test_threshold == 0:
                    if batch_count == 0:
                        print "initial test"
                    else:
                        if batch >= 0.0:
                            print" "
                            accuracy = 1.0 - (total_err_count/((batch+1)*N_BATCH))
                            print "training set accuracy: ", accuracy * 100, "%"

                    print"test on valid set..."
                    val_result = self.val_set_test()
                    print "accuracy is: ", val_result*100, "%"
                    if val_result > best_val_accuracy:
                        print "new best! test on test set..."
                        best_val_accuracy = val_result

                        test_accuracy = self.test_set_test()
                        print "test set accuracy: ", test_accuracy * 100, "%"
                        if test_accuracy > best_test_accuracy:
                            best_test_accuracy = test_accuracy

                batch_count += 1.0
                '''master version print'''
                percetage = ((batch_count % test_threshold )*1.0) / test_threshold * 100
                if percetage - prev_percetage >= 1.0:
                    speed = N_BATCH * (batch_count- start_batch) / (time.time() - start_time)
                    start_time = time.time()
                    start_batch = batch_count
                    utils.progress_bar(percetage, speed)
                    prev_percetage = percetage
                    if prev_percetage >= 99:
                        prev_percetage = 0.0

                '''end of print'''
            total_cost += cost

            print "======================================="
            print "epoch summary:"
            print "average cost in this epoch: ", total_cost
            print "======================================="