示例#1
0
    def __init__(self, config):
        self.config = config

        self.src_vocab = utils.Dict()
        self.src_vocab.loadFile(os.path.join(config.data, "src.vocab"))
        self.tgt_vocab = utils.Dict()
        self.tgt_vocab.loadFile(os.path.join(config.data, "tgt.vocab"))

        self.num_tags = self.tgt_vocab.size() + 2

        if config.model == 'bilstm_crf':
            self.model = BiLSTM_CRF(self.src_vocab.size(),
                                    self.tgt_vocab.size(), config)
        elif config.model == 'reslstm_crf':
            self.model = ResLSTM_CRF(self.src_vocab.size(),
                                     self.tgt_vocab.size(), config)
        elif config.model == 'transformer_crf':
            self.model = TransformerCRF(self.src_vocab.size(),
                                        self.tgt_vocab.size(), config)
        else:
            self.model = None
            raise NotImplementedError(config.model + " not implemented!")
        checkpoint = torch.load(config.restore, lambda storage, loc: storage)
        print(self.model.state_dict().keys())
        print(checkpoint['model'].keys())
        self.model.load_state_dict(checkpoint['model'])
        self.model.eval()

        self.model.to(config.device)

        self.oovs = {0: 'B', 1: 'B'}
示例#2
0
    def receiveHumanSignal(self, signal):
        """ receive human signal and update the weights """
        # do nothing when the signal is 0 or it's not in training
        if signal == 0 or not self.isTraining:
            return

        # clear stale data
        current_time = time.time()
        while len(self.experiences) > 0:
            experience = self.experiences[0]
            if experience['time'] < current_time - self.window_size:
                self.experiences.pop(0)
            else:
                break

        # update weights using Algorithm 1 in paper
        n_experiences = len(self.experiences)
        weight_per_experience = 1.0 / n_experiences
        cred_features = utils.Dict()

        for experience in self.experiences:
            exp_features = experience['state']['feature']
            exp_features.multiplyAll(weight_per_experience)
            cred_features += exp_features

        error = signal - self.weights * cred_features
        cred_features.multiplyAll(self.learning_rate * error)
        self.weights += cred_features

        if IS_DEBUG:
            print('Tamer Agent - updated weights using human signal: %s' %
                  self.weights)
def main():

    dicts = {}

    train_src, train_tgt = opt.load_data + 'train.' + opt.src_suf, opt.load_data + 'train.' + opt.tgt_suf
    valid_src, valid_tgt = opt.load_data + 'valid.' + opt.src_suf, opt.load_data + 'valid.' + opt.tgt_suf
    test_src, test_tgt = opt.load_data + 'test.' + opt.src_suf, opt.load_data + 'test.' + opt.tgt_suf

    save_train_src, save_train_tgt = opt.save_data + 'train.' + opt.src_suf, opt.save_data + 'train.' + opt.tgt_suf
    save_valid_src, save_valid_tgt = opt.save_data + 'valid.' + opt.src_suf, opt.save_data + 'valid.' + opt.tgt_suf
    save_test_src, save_test_tgt = opt.save_data + 'test.' + opt.src_suf, opt.save_data + 'test.' + opt.tgt_suf

    src_dict, tgt_dict = opt.save_data + 'src.dict', opt.save_data + 'tgt.dict'

    if opt.share:
        assert opt.src_vocab_size == opt.tgt_vocab_size
        print('Building source and target vocabulary...')
        dicts['src'] = dicts['tgt'] = utils.Dict([utils.PAD_WORD, utils.UNK_WORD, utils.BOS_WORD, utils.EOS_WORD])
        dicts['src'] = makeVocabulary(train_src, opt.src_trun, opt.src_filter, opt.src_char, dicts['src'], opt.src_vocab_size)
        dicts['src'] = dicts['tgt'] = makeVocabulary(train_tgt, opt.tgt_trun, opt.tgt_filter, opt.tgt_char, dicts['tgt'], opt.tgt_vocab_size)
    else:
        print('Building source vocabulary...')
        dicts['src'] = utils.Dict([utils.PAD_WORD, utils.UNK_WORD, utils.BOS_WORD, utils.EOS_WORD])
        dicts['src'] = makeVocabulary(train_src, opt.src_trun, opt.src_filter, opt.src_char, dicts['src'], opt.src_vocab_size)
        print('Building target vocabulary...')
        dicts['tgt'] = utils.Dict([utils.PAD_WORD, utils.UNK_WORD, utils.BOS_WORD, utils.EOS_WORD])
        dicts['tgt'] = makeVocabulary(train_tgt, opt.tgt_trun, opt.tgt_filter, opt.tgt_char, dicts['tgt'], opt.tgt_vocab_size)

    print('Preparing training ...')
    train = makeData(train_src, train_tgt, dicts['src'], dicts['tgt'], save_train_src, save_train_tgt)

    print('Preparing validation ...')
    valid = makeData(valid_src, valid_tgt, dicts['src'], dicts['tgt'], save_valid_src, save_valid_tgt)

    print('Preparing test ...')
    test = makeData(test_src, test_tgt, dicts['src'], dicts['tgt'], save_test_src, save_test_tgt)

    print('Saving source vocabulary to \'' + src_dict + '\'...')
    dicts['src'].writeFile(src_dict)

    print('Saving source vocabulary to \'' + tgt_dict + '\'...')
    dicts['tgt'].writeFile(tgt_dict)

    data = {'train': train, 'valid': valid,
             'test': test, 'dict': dicts}
    pickle.dump(data, open(opt.save_data+'data.pkl', 'wb'))
def main():
    dicts = {}
    embbs = {}
    domains = ['books', 'dvd', 'electronics', 'kitchen', 'video']
    files = []
    for r, d, f in os.walk(opt.load_path):
        for file in f:
            files.append(os.path.join(r, file))
    print("Done reading all files")
    if opt.model_path == None:
        model = trainW2V(files, freq=opt.freq, dim=opt.dim)
    elif opt.model_path == "google":
        model = gensim.models.KeyedVectors.load_word2vec_format(
            '../GoogleNews-vectors-negative300.bin.gz', binary=True)
    else:
        model = gensim.models.Word2Vec.load(opt.model_path)
    print("Done training/loading the W2V")
    print(model.wv.vectors.shape)
    for src_domain in domains:
        dicts[src_domain] = {}
        embbs[src_domain] = {}
        for tgt_domain in domains:
            if src_domain != tgt_domain:
                print("Start handling the files for %s to %s" %
                      (src_domain, tgt_domain))
                files = []
                for r, d, f in os.walk(os.path.join(opt.load_path,
                                                    src_domain)):
                    for file in f:
                        files.append(os.path.join(r, file))
                for r, d, f in os.walk(os.path.join(opt.load_path,
                                                    tgt_domain)):
                    for file in f:
                        files.append(os.path.join(r, file))
                dicts[src_domain][tgt_domain] = makeVocab(files,
                                                          size=opt.size,
                                                          vocab=utils.Dict(
                                                              None,
                                                              lower=opt.lower),
                                                          freq=opt.freq,
                                                          lower=opt.lower)
                embbs[src_domain][tgt_domain] = makeEmbb(
                    dicts[src_domain][tgt_domain], model, dim=opt.dim)
                print("Done handling the files for %s to %s" %
                      (src_domain, tgt_domain))

    for key, value in dicts.items():
        print(key, value)
    for key, value in embbs.items():
        print(key, value)

    saveVocab('dict', dicts,
              os.path.join(opt.save_path, "vocab%d.pkl" % (opt.dim)))
    saveVocab('embb', embbs,
              os.path.join(opt.save_path, "embb%d.pkl" % (opt.dim)))
示例#5
0
 def getStateFeatures(self, state, action):
     # get pacman pos
     pacman_pos = utils.getPacmanPos(state)
     if pacman_pos is None:
         print('pacman not found')
         return utils.Dict()
     else:
         (x, y) = pacman_pos
     # compute the next pos
     (next_x, next_y) = utils.getNextPos(x, y, action)
     # use bfs to get features
     return self.bfs_features(state, next_x, next_y)
示例#6
0
def knowStr2Id(knowstr):
    LOWER = 0
    CHAR = 0
    dicts = {}
    # dicts['tgt'] = utils.Dict(data='core/dataloading/tgt.dict', lower=LOWER)
    dicts['tgt'] = utils.Dict(
        data='data/aspect-user/preprocessed/short_dataset/tgt.dict',
        lower=LOWER)
    knowstr = knowstr.replace(' ', '')
    s = ""
    for char in knowstr:
        s = s + char + " "
    srcWords = s.split() if not CHAR else list(s)
    knowStrIds = dicts['tgt'].convertToIdx(srcWords, utils.UNK_WORD)
    return knowStrIds
示例#7
0
 def __init__(self,
              index=0,
              isTraining=True,
              epsilon=0.5,
              alpha=0.5,
              gamma=1,
              window_size=2,
              max_n_experiences=1000,
              learning_rate=0.01):
     """
         window_size: use the experiences within 2 seconds to update the weights
         max_n_experiences: maximum number of experiences stored in the history list
     """
     PythonReinforcementAgent.__init__(self, index, isTraining, epsilon,
                                       alpha, gamma, learning_rate)
     self.weights = utils.Dict()
     self.weights['bias'] = 0
     self.window_size = window_size
     self.experiences = list()
     self.max_n_experiences = max_n_experiences
示例#8
0
def main():
    dicts = {}
    dicts['src'] = utils.Dict(data=opt.src_dict, lower=opt.lower)
    dicts['tgt'] = utils.Dict(data=opt.tgt_dict, lower=opt.lower)
    data = makeData(opt.src_str, opt.tgt_str, dicts['src'], dicts['tgt'],
                    opt.save)
示例#9
0
                here = g.locations[node]
                def distance_to_node(n):
                    if n is node or g.get(node,n): return infinity
                    return distance(g.locations[n], here)
                neighbor = argmin(nodes, distance_to_node)
                d = distance(g.locations[neighbor], here) * curvature()
                g.connect(node, neighbor, int(d))
    return g

romania = UndirectedGraph(utils.Dict(
    A=utils.Dict(Z=75, S=140, T=118),
    B=utils.Dict(U=85, P=101, G=90, F=211),
    C=utils.Dict(D=120, R=146, P=138),
    D=utils.Dict(M=75),
    E=utils.Dict(H=86),
    F=utils.Dict(S=99),
    H=utils.Dict(U=98),
    I=utils.Dict(V=92, N=87),
    L=utils.Dict(T=111, M=70),
    O=utils.Dict(Z=71, S=151),
    P=utils.Dict(R=97),
    R=utils.Dict(S=80),
    U=utils.Dict(V=142)))
romania.locations = utils.Dict(
    A=( 91, 492),    B=(400, 327),    C=(253, 288),   D=(165, 299),
    E=(562, 293),    F=(305, 449),    G=(375, 270),   H=(534, 350),
    I=(473, 506),    L=(165, 379),    M=(168, 339),   N=(406, 537),
    O=(131, 571),    P=(320, 368),    R=(233, 410),   S=(207, 457),
    T=( 94, 410),    U=(456, 350),    V=(509, 444),   Z=(108, 531))

australia = UndirectedGraph(utils.Dict(
    T=utils.Dict(),
示例#10
0
    LABEL = data.Field(sequential=True, use_vocab=False, batch_first=True, unk_token=utils.UNK,
                       include_lengths=True, pad_token=utils.PAD, preprocessing=to_int, )
    # init_token=utils.BOS, eos_token=utils.EOS)

    fields = [("text", TEXT), ("label", LABEL)]
    validDataset = datasets.SequenceTaggingDataset(path=os.path.join(config.data, 'valid.txt'),
                                                   fields=fields)
    valid_iter = data.Iterator(validDataset,
                               batch_size=config.batch_size,
                               sort_key=lambda x: len(x.text),  # field sorted by len
                               sort=True,
                               sort_within_batch=True,
                               repeat=False
                               )

    src_vocab = utils.Dict()
    src_vocab.loadFile(os.path.join(config.data, "src.vocab"))
    tgt_vocab = utils.Dict()
    tgt_vocab.loadFile(os.path.join(config.data, "tgt.vocab"))

    if config.model == 'bilstm_crf':
        model = BiLSTM_CRF(src_vocab.size(), tgt_vocab.size(), config)
    elif config.model == 'reslstm_crf':
        model = ResLSTM_CRF(src_vocab.size(), tgt_vocab.size(), config)
    elif config.model == 'transformer_crf':
        model = TransformerCRF(src_vocab.size(), tgt_vocab.size(), config)
    else:
        model = None
        raise NotImplementedError(config.model + " not implemented!")
    model.to(device)
示例#11
0
    def bfs_features(state, next_x, next_y):
        """
            extract features using BFS
            In state, 0: nothing, 1: wall, 2: path, 3: pacman, 4: scared ghost, 5: ghost, 6: food, 7: capsule
        """
        # init features
        features = utils.Dict()
        features['dist-food'] = None
        features['dist-capsule'] = None
        features['dist-ghost'] = None
        features['dist-scared-ghost'] = None
        features['bias'] = 1.0

        legal_moves = [(1, 1), (1, 0), (1, -1), (0, 1), (0, -1), (-1, 0),
                       (-1, -1), (-1, 1)]
        dim_state = state.shape

        # queue for BFS
        que_start = 0
        que_end = 0
        max_depth = 160 * 210
        queue_bfs = [[0, 0, 0]
                     for _ in range(0, max_depth)]  # format [x,y,depth]
        expanded = set()

        # push initial state
        queue_bfs[que_start][0] = int(next_x)
        queue_bfs[que_start][1] = int(next_y)
        queue_bfs[que_start][2] = 0
        que_end += 1
        expanded.add((next_x, next_y))

        while que_start <= que_end < max_depth:
            x = queue_bfs[que_start][0]
            y = queue_bfs[que_start][1]
            depth = queue_bfs[que_start][2]
            que_start += 1

            # if it's not wall
            if state[x, y] != 1:
                # if it's food
                if state[x, y] == 6 and features['dist-food'] is None:
                    features['dist-food'] = depth
                # if it's capsule
                if state[x, y] == 7 and features['dist-capsule'] is None:
                    features['dist-capsule'] = depth
                # if it's ghost
                if state[x, y] == 5 and features['dist-ghost'] is None:
                    features['dist-ghost'] = depth
                # if it's scared ghost
                if state[x, y] == 4 and features['dist-scared-ghost'] is None:
                    features['dist-scared-ghost'] = depth

                # spread out from the location to its neighbours
                neighbours = []
                for legal_move in legal_moves:
                    new_x = x + legal_move[0]
                    new_y = y + legal_move[1]
                    if 0 <= new_x < dim_state[0] and 0 <= new_y < dim_state[1]:
                        neighbours.append((new_x, new_y))

                for new_x, new_y in neighbours:
                    if que_end >= max_depth:
                        break
                    if (new_x, new_y) in expanded:
                        continue
                    else:
                        expanded.add((new_x, new_y))

                    new_depth = depth + 1
                    queue_bfs[que_end][0] = new_x
                    queue_bfs[que_end][1] = new_y
                    queue_bfs[que_end][2] = new_depth
                    que_end += 1

        # compute the features
        if features['dist-food'] is not None:
            features['dist-food'] = features['dist-food'] / float(
                160 * 170 * 10)
        else:
            features['dist-food'] = 0

        if features['dist-capsule'] is not None:
            features['dist-capsule'] = features['dist-capsule'] / float(
                160 * 170 * 10)
        else:
            features['dist-capsule'] = 0

        if features['dist-ghost'] is not None:
            features['dist-ghost'] = features['dist-ghost'] / float(
                160 * 170 * 10)
        else:
            features['dist-ghost'] = 0

        if features['dist-scared-ghost'] is not None:
            features['dist-scared-ghost'] = features[
                'dist-scared-ghost'] / float(160 * 170 * 10)
        else:
            features['dist-scared-ghost'] = 0

        return features
示例#12
0
#        gpu="0",
#        restore=False,
#        pretrain="experiments/aspect-user/best_bleu_checkpoint.pt",
#        mode="eval",
#        batch_size=1,
#        beam_size=10,
#        # refactor issue; workaround; delete afterwards:
#        scale=1,
#        char=False,
#        use_cuda=True,
#        seed=1234,
#        model="tensor2tensor",
#        num_workers=0
#    )
dicts = {}
dicts['src'] = utils.Dict(data='./dataloading/src.dict', lower=LOWER)

print('*'*5+"欢迎使用爱文案AI文案生成服务"+'*'*5)
key=''
while (key!='quit'):
    key=''
    inputstr = ''
    aspect=''
    srcIds=[]
    srcWords=[]

    key = input("请输入关键词,以空格分开。\n>>>")
    if(key=='quit'):
        break
    keystr = key.replace(' ','')
    for char in keystr:
示例#13
0
def main():
    dicts = {}
    # load_data中存放输入文件夹路径、分别需要使用6个文件(train.src、train.tgt。。。)
    train_src, train_tgt = opt.load_data + 'train.' + opt.src_suf, opt.load_data + 'train.' + opt.tgt_suf
    valid_src, valid_tgt = opt.load_data + 'valid.' + opt.src_suf, opt.load_data + 'valid.' + opt.tgt_suf
    test_src, test_tgt = opt.load_data + 'test.' + opt.src_suf, opt.load_data + 'test.' + opt.tgt_suf

    # 保存文件名及路径
    save_train_src, save_train_tgt = opt.save_data + 'train.' + opt.src_suf, opt.save_data + 'train.' + opt.tgt_suf
    save_valid_src, save_valid_tgt = opt.save_data + 'valid.' + opt.src_suf, opt.save_data + 'valid.' + opt.tgt_suf
    save_test_src, save_test_tgt = opt.save_data + 'test.' + opt.src_suf, opt.save_data + 'test.' + opt.tgt_suf

    # 词表保存路径
    src_dict, tgt_dict = opt.save_data + 'src.dict', opt.save_data + 'tgt.dict'

    # 判断输入输出是否共享词表, 默认为false
    if opt.share:
        assert opt.src_vocab_size == opt.tgt_vocab_size
        print('Building source and target vocabulary...')
        # 创建词表,源和目标词表相同。调用utils中的dict_helper.py中的Dict类,[<blank>,<unk>,<s>,</s>]。
        dicts['src'] = dicts['tgt'] = utils.Dict(
            [utils.PAD_WORD, utils.UNK_WORD, utils.BOS_WORD, utils.EOS_WORD])
        # 调用makeVocabulary构建词表
        dicts['src'] = makeVocabulary(train_src, opt.src_trun, opt.src_filter,
                                      opt.src_char, dicts['src'],
                                      opt.src_vocab_size)
        # 将target也放入词表
        dicts['src'] = dicts['tgt'] = makeVocabulary(train_tgt, opt.tgt_trun,
                                                     opt.tgt_filter,
                                                     opt.tgt_char,
                                                     dicts['tgt'],
                                                     opt.tgt_vocab_size)
    else:
        print('Building source vocabulary...')
        dicts['src'] = utils.Dict(
            [utils.PAD_WORD, utils.UNK_WORD, utils.BOS_WORD, utils.EOS_WORD])
        dicts['src'] = makeVocabulary(train_src, opt.src_trun, opt.src_filter,
                                      opt.src_char, dicts['src'],
                                      opt.src_vocab_size)
        print('Building target vocabulary...')
        dicts['tgt'] = utils.Dict(
            [utils.PAD_WORD, utils.UNK_WORD, utils.BOS_WORD, utils.EOS_WORD])
        dicts['tgt'] = makeVocabulary(train_tgt, opt.tgt_trun, opt.tgt_filter,
                                      opt.tgt_char, dicts['tgt'],
                                      opt.tgt_vocab_size)

    print('Preparing training ...')
    train = makeData(train_src, train_tgt, dicts['src'], dicts['tgt'],
                     save_train_src, save_train_tgt)

    print('Preparing validation ...')
    valid = makeData(valid_src, valid_tgt, dicts['src'], dicts['tgt'],
                     save_valid_src, save_valid_tgt)

    print('Preparing test ...')
    test = makeData(test_src, test_tgt, dicts['src'], dicts['tgt'],
                    save_test_src, save_test_tgt)

    print('Saving source vocabulary to \'' + src_dict + '\'...')
    dicts['src'].writeFile(src_dict)

    print('Saving source vocabulary to \'' + tgt_dict + '\'...')
    dicts['tgt'].writeFile(tgt_dict)

    data = {'train': train, 'valid': valid, 'test': test, 'dict': dicts}
    pickle.dump(data, open(opt.save_data + 'data.pkl', 'wb'))
示例#14
0
    'original_tgtF'] = './data/aspect-user/preprocessed/short_dataset/valid.tgt.str'
data['valid'][
    'know'] = './data/aspect-user/preprocessed/short_dataset/valid.supporting_facts.id'
data['valid'][
    'origin_know'] = './data/aspect-user/preprocessed/short_dataset/valid.supporting_facts_str'

data['test'][
    'srcF'] = './data/aspect-user/preprocessed/short_dataset/test.src.id'
data['test'][
    'tgtF'] = './data/aspect-user/preprocessed/short_dataset/test.tgt.id'
data['test'][
    'original_srcF'] = './data/aspect-user/preprocessed/short_dataset/test.src.str'
data['test'][
    'original_tgtF'] = './data/aspect-user/preprocessed/short_dataset/test.tgt.str'
data['test'][
    'know'] = './data/aspect-user/preprocessed/short_dataset/test.supporting_facts.id'
data['test'][
    'origin_know'] = './data/aspect-user/preprocessed/short_dataset/test.supporting_facts_str'

data['dict']

# codehuotengfeiKOBE  KOBE    data    aspect-user   preprocessed    short_dataset
dicts_new = {}
dicts_new['src'] = utils.Dict(
    data='./data/aspect-user/preprocessed/short_dataset/src.dict')
dicts_new['tgt'] = utils.Dict(
    data='./data/aspect-user/preprocessed/short_dataset/tgt.dict')

data['dict'] = dicts_new
pickle.dump(
    data, open('./data/aspect-user/preprocessed/short_dataset/data.pkl', 'wb'))
示例#15
0
            assert len(src) == len(tgt), 'unequal length of src and tgt'
            srcList.append(src)
            tgtList.append(tgt)

    # srcList = srcList[:100]
    # tgtList = tgtList[:100]
    # dump raw files
    # with io.open(opt.outdir, 'w+', encoding='utf-8') as fout:
    #     for src, tgt in zip(srcList, tgtList):
    #         for ch, label in zip(src, tgt):
    #             fout.write(ch +'\t'+ label +'\n')
    #         fout.write('\n')

    print('building dictionary...')
    srcDict = utils.Dict(
        [utils.PAD_WORD, utils.UNK_WORD, utils.BOS_WORD, utils.EOS_WORD],
        lower=True)
    for stses in srcList:
        for ch in stses:
            srcDict.add(ch)
    srcDict = srcDict.prune(srcDict.size(), opt.min_freq)
    srcDict.writeFile(os.path.join(opt.outdir, 'src.vocab'))

    tgtDict = utils.Dict([utils.PAD_WORD], lower=False)
    tgtDict.add('B')
    tgtDict.add('I')
    tgtDict.add('E')
    tgtDict.writeFile(os.path.join(opt.outdir, 'tgt.vocab'))

    logger.info("convert to idx...")
    srcIdList = []
示例#16
0
       mode="eval",
       batch_size=1,
       beam_size=10,
       # refactor issue; workaround; delete afterwards:
       scale=1,
       char=False,
       use_cuda=True,
       seed=1234,
       model="tensor2tensor",
       num_workers=0
   )


dicts = {}
# dicts['src'] = utils.Dict(data='core/dataloading/src.dict', lower=LOWER)
dicts['src'] = utils.Dict(data='data/aspect-user/preprocessed/short_dataset/src.dict', lower=LOWER)



def lengthCutter(length, inputstr):
    if length=='a':
        lenlimit = 60
    elif length=='b':
        lenlimit = 110
    elif length=='c':
        lenlimit = 9999

    pos=lenlimit
    outputstr = output[:lenlimit+1]
    while not bool(re.match(r'[,.!? ,。!?]', output[pos])):
        pos += 1