示例#1
0
    def __init__(self,
            data_dir,
            frword2vec,
            fqembed,
            frembed,
            qmax_length=20,
            rmax_length=30,
            ref_method='max_min',
            gru_units=128, mlp_units=[256, 512, 128]
        ):

        self.ref=Referenced(data_dir, frword2vec, ref_method)
        self.unref=Unreferenced(qmax_length, rmax_length,
                os.path.join(data_dir,fqembed),
                os.path.join(data_dir,frembed),
                gru_units, mlp_units,
                train_dir=train_dir)
示例#2
0
    def __init__(self,
                 word2vec_file,
                 query_w2v_file,
                 reply_w2v_file,
                 train_dir,
                 query_max_len=20,
                 reply_max_len=30,
                 pooling_type='max_min',
                 gru_units=128,
                 mlp_units=None):
        if mlp_units is None:
            mlp_units = [256, 512, 128]
        logger.info('creating ref model')
        self.ref = Referenced(word2vec_file, pooling_type)

        logger.info('creating unref model')
        self.unref = Unreferenced(query_max_len, reply_max_len,
                                  query_w2v_file,
                                  reply_w2v_file,
                                  gru_units, mlp_units,
                                  train_dir=train_dir)
示例#3
0
 def __init__(
     self,
     data_dir,
     frword2vec,
     fqembed,
     frembed,
     qmax_length=20,
     rmax_length=30,
     ref_method='max_min',
     gru_num_units=512,
     mlp_units=[256, 512, 128],
     init_learning_rate=0.001,
     margin=0.5,
     batch_norm=False,
     is_training=True,
     train_dataset='',
     log_dir="training",
     scramble=False,
     additional_negative_samples='',
 ):
     print("Initializing referenced model")
     self.ref = Referenced(data_dir, frword2vec, ref_method)
     print("Initializing unreferenced model with log_dir " + log_dir +
           " and ref method " + ref_method)
     self.unref = Unreferenced(
         qmax_length,
         rmax_length,
         os.path.join(data_dir, fqembed),
         os.path.join(data_dir, frembed),
         gru_num_units=gru_num_units,
         mlp_units=mlp_units,
         init_learning_rate=init_learning_rate,
         margin=margin,
         is_training=is_training,
         batch_norm=batch_norm,
         train_dataset=train_dataset,
         log_dir=log_dir,
         scramble=scramble,
         additional_negative_samples=additional_negative_samples)
示例#4
0
 def __init__(self,
              data_dir,
              frword2vec,
              fqembed,
              frembed,
              qmax_length=20,
              rmax_length=30,
              ref_method='max_min',
              gru_units=128,
              mlp_units=[256, 512, 128],
              is_training=True):
     print("Initializing referenced model")
     self.ref = Referenced(data_dir, frword2vec, ref_method)
     print("Initializing unreferenced model")
     self.unref = Unreferenced(qmax_length,
                               rmax_length,
                               os.path.join(data_dir, fqembed),
                               os.path.join(data_dir, frembed),
                               gru_units,
                               mlp_units,
                               train_dir=train_dir,
                               is_training=is_training)
示例#5
0
class Hybrid(object):
    def __init__(self,
                 word2vec_file,
                 query_w2v_file,
                 reply_w2v_file,
                 train_dir,
                 query_max_len=20,
                 reply_max_len=30,
                 pooling_type='max_min',
                 gru_units=128,
                 mlp_units=None):
        if mlp_units is None:
            mlp_units = [256, 512, 128]
        logger.info('creating ref model')
        self.ref = Referenced(word2vec_file, pooling_type)

        logger.info('creating unref model')
        self.unref = Unreferenced(query_max_len, reply_max_len,
                                  query_w2v_file,
                                  reply_w2v_file,
                                  gru_units, mlp_units,
                                  train_dir=train_dir)

    def train_unref(self, query_file, reply_file):
        logger.info('training unref model')
        self.unref.train(query_file, reply_file)

    def _normalize(self, scores):
        smin = min(scores)
        smax = max(scores)
        diff = smax - smin
        ret = [(s - smin) / diff for s in scores]
        return ret

    def get_ref_scores(self, reply_file, generated_file):
        logger.info('computing ref_scores')
        ref_scores = self.ref.get_scores(reply_file, generated_file)
        ref_scores = self._normalize(ref_scores)
        return ref_scores

    def get_unref_scores(self, generated_file, query_file, query_vocab_file, reply_vocab_file):
        logger.info('computing unref_scores')
        unref_scores = self.unref.get_scores(query_file, generated_file, query_vocab_file, reply_vocab_file)
        unref_scores = self._normalize(unref_scores)
        return unref_scores

    def get_scores(self, query_file, reply_file, generated_file, query_vocab_file, reply_vocab_file):
        ref_scores = self.get_ref_scores(reply_file, generated_file)
        unref_scores = self.get_unref_scores(generated_file, query_file, query_vocab_file, reply_vocab_file)
        # min() combiner.
        return [min(a, b) for a, b in zip(ref_scores, unref_scores)]
示例#6
0
class Hybrid():
    def __init__(self,
                 data_dir,
                 frword2vec,
                 fqembed,
                 frembed,
                 qmax_length=20,
                 rmax_length=30,
                 ref_method='max_min',
                 gru_units=128,
                 mlp_units=[256, 512, 128]):

        self.ref = Referenced(data_dir, frword2vec, ref_method)
        self.unref = Unreferenced(qmax_length,
                                  rmax_length,
                                  os.path.join(data_dir, fqembed),
                                  os.path.join(data_dir, frembed),
                                  gru_units,
                                  mlp_units,
                                  train_dir=train_dir)

    def train_unref(self, data_dir, fquery, freply):
        self.unref.train(data_dir, fquery, freply)

    def normalize(self, scores):
        smin = min(scores)
        smax = max(scores)
        diff = smax - smin
        ret = [(s - smin) / diff for s in scores]
        return ret

    def scores(self, data_dir, fquery, freply, fgenerated, fqvocab, frvocab):
        ref_scores = self.ref.scores(data_dir, freply, fgenerated)
        ref_scores = self.normalize(ref_scores)

        unref_scores = self.unref.scores(data_dir, fquery, fgenerated, fqvocab,
                                         frvocab)
        unref_socres = self.normalize(unref_scores)

        return [min(a, b) for a, b in zip(ref_scores, unref_scores)]
示例#7
0
class Hybrid():
    def __init__(
        self,
        data_dir,
        frword2vec,
        fqembed,
        frembed,
        qmax_length=20,
        rmax_length=30,
        ref_method='max_min',
        gru_num_units=512,
        mlp_units=[256, 512, 128],
        init_learning_rate=0.001,
        margin=0.5,
        batch_norm=False,
        is_training=True,
        train_dataset='',
        log_dir="training",
        scramble=False,
        additional_negative_samples='',
    ):
        print("Initializing referenced model")
        self.ref = Referenced(data_dir, frword2vec, ref_method)
        print("Initializing unreferenced model with log_dir " + log_dir +
              " and ref method " + ref_method)
        self.unref = Unreferenced(
            qmax_length,
            rmax_length,
            os.path.join(data_dir, fqembed),
            os.path.join(data_dir, frembed),
            gru_num_units=gru_num_units,
            mlp_units=mlp_units,
            init_learning_rate=init_learning_rate,
            margin=margin,
            is_training=is_training,
            batch_norm=batch_norm,
            train_dataset=train_dataset,
            log_dir=log_dir,
            scramble=scramble,
            additional_negative_samples=additional_negative_samples)

    def train_unref(self, data_dir, fquery, freply, validation_fquery,
                    validation_freply_true):
        print("training unreferenced metric")
        self.unref.train(data_dir, fquery, freply, validation_fquery,
                         validation_freply_true)

    def normalize(self,
                  scores,
                  smin=None,
                  smax=None,
                  coefficient=None,
                  smallest_value=0):
        if not smin and not smax:
            smin = min(scores)
            smax = max(scores)
            diff = smax - smin

# normalize to [0-2] instead to fit RUBER human scores
        else:
            smin = smin
            diff = smax - smin
        if coefficient:
            ret = [
                smallest_value + (coefficient * (s - smin) / diff)
                for s in scores
            ]
        else:
            ret = [smallest_value + ((s - smin) / diff) for s in scores]
        return ret

    def scores(self, data_dir, fquery, freply, fgenerated, fqvocab, frvocab,
               checkpoint_dir):
        ref_scores = self.ref.scores(data_dir, freply, fgenerated)
        norm_ref_scores = self.normalize(ref_scores,
                                         coefficient=4,
                                         smallest_value=1)

        unref_scores = self.unref.scores(data_dir,
                                         fquery,
                                         fgenerated,
                                         fqvocab,
                                         frvocab,
                                         checkpoint_dir,
                                         init=False)
        norm_unref_scores = self.normalize(unref_scores,
                                           coefficient=4,
                                           smallest_value=1)

        return [
            np.mean([a, b]) for a, b in zip(norm_ref_scores, norm_unref_scores)
        ], ref_scores, norm_ref_scores, unref_scores, norm_unref_scores

    def validate_to_csv(self, checkpoint_dir, data_dir, validation_fquery,
                        validation_freply_generated, validation_freply_true,
                        training_fquery, qmax_length, training_freply,
                        rmax_length, train_dataset, validation_dataset):
        print("Starting validation")
        scores, ref_scores, norm_ref_scores, unref_scores, norm_unref_scores \
                = self.scores(data_dir, validation_fquery, validation_freply_true, validation_freply_generated, \
                    '%s.vocab%d'%(training_fquery, qmax_length),'%s.vocab%d'%(training_freply, rmax_length), checkpoint_dir)

        csv_dir = os.path.join('./results', checkpoint_dir, validation_dataset)

        print(csv_dir)
        reply_file_path = validation_freply_generated.split("/")
        reply_file = reply_file_path[len(reply_file_path) - 1]
        print(reply_file)
        csv_title = os.path.join(csv_dir, reply_file.rstrip(".txt") + ".csv")
        print("Csv title: ")
        print(csv_title)
        if not os.path.exists(csv_dir):
            os.makedirs(csv_dir)
        """write results to CSV"""
        with open(csv_title, 'w+') as csvfile:
            writer = csv.writer(csvfile, delimiter=',')
            column_titles = [
                "Query", "Scored reply", "Ground truth reply", "Score",
                "Ref score", "Normed ref score", "Unref score",
                "Normed unref score"
            ]
            writer.writerow([col for col in column_titles])

            with open(os.path.join(data_dir, validation_fquery), "r") as queries, \
                    open(os.path.join(data_dir, validation_freply_generated), "r") as scored_replies, \
                        open(os.path.join(data_dir, validation_freply_true), "r") as true_replies:
                for query, scored_reply, true_reply, score, ref_score, norm_ref_score, unref_score, norm_unref_score in zip(
                        queries, scored_replies, true_replies, scores,
                        ref_scores, norm_ref_scores, unref_scores,
                        norm_unref_scores):
                    query = query.rstrip()
                    scored_reply = scored_reply.rstrip()
                    true_reply = true_reply.rstrip()
                    writer.writerow([
                        query, scored_reply, true_reply, score, ref_score,
                        norm_ref_score, unref_score, norm_unref_score
                    ])
        csvfile.close()

        print(
            "max score: {}, min score: {}, median score: {}, mean score: {}, median norm ref: {}, min unnorm ref: {}, max unnorm ref: {}, median norm unref: {}, min unnorm unref: {}, max unnorm unref: {}"
        ).format(max(scores), min(scores), median(scores), mean(scores),
                 median(norm_ref_scores), min(ref_scores), max(ref_scores),
                 median(norm_unref_scores), min(unref_scores),
                 max(unref_scores))

        print("Wrote  model results to " + csv_title)
示例#8
0
class Hybrid():
    def __init__(self,
                 data_dir,
                 frword2vec,
                 fqembed,
                 frembed,
                 qmax_length=20,
                 rmax_length=30,
                 ref_method='max_min',
                 gru_units=128,
                 mlp_units=[256, 512, 128],
                 is_training=True):
        print("Initializing referenced model")
        self.ref = Referenced(data_dir, frword2vec, ref_method)
        print("Initializing unreferenced model")
        self.unref = Unreferenced(qmax_length,
                                  rmax_length,
                                  os.path.join(data_dir, fqembed),
                                  os.path.join(data_dir, frembed),
                                  gru_units,
                                  mlp_units,
                                  train_dir=train_dir,
                                  is_training=is_training)

    def train_unref(self, data_dir, fquery, freply):
        print("training unreferenced metric")
        self.unref.train(data_dir, fquery, freply)

    def normalize(self,
                  scores,
                  smin=None,
                  smax=None,
                  coefficient=None,
                  smallest_value=0):
        if not smin and not smax:
            smin = min(scores)
            smax = max(scores)
            diff = smax - smin

# normalize to [0-2] instead to fit RUBER human scores
        else:
            smin = smin
            diff = smax - smin
        if coefficient:
            ret = [
                smallest_value + (coefficient * (s - smin) / diff)
                for s in scores
            ]
        else:
            ret = [smallest_value + ((s - smin) / diff) for s in scores]
        return ret

    def scores(self, data_dir, fquery, freply, fgenerated, fqvocab, frvocab):
        print("training dir is ")
        print(train_dir)
        ref_scores = self.ref.scores(data_dir,
                                     freply,
                                     fgenerated,
                                     train_dir=train_dir)
        norm_ref_scores = self.normalize(ref_scores,
                                         coefficient=4,
                                         smallest_value=1)

        unref_scores = self.unref.scores(data_dir,
                                         fquery,
                                         fgenerated,
                                         fqvocab,
                                         frvocab,
                                         init=False,
                                         train_dir=train_dir)
        norm_unref_scores = self.normalize(unref_scores,
                                           coefficient=4,
                                           smallest_value=1)

        return [
            np.mean([a, b]) for a, b in zip(norm_ref_scores, norm_unref_scores)
        ], ref_scores, norm_ref_scores, unref_scores, norm_unref_scores