示例#1
0
def gen_word_vector_mapping(
    # model_name='googlenews',
    # model_name='glove',
    model_name='fasttext', ):
    """
    generate word vec mapping
    :return:
    """
    files = {
        'googlenews': ('GoogleNews-vectors-negative300.bin', True),
        'glove': ('glove.840B.300d.txt', False),
        'fasttext': ('wiki.en.model.vec', False)
    }
    f = files[model_name]
    model = gensim.models.Word2Vec.load_word2vec_format(
        data_path(f[0]), binary=f[1], unicode_errors='strict')
    word_list = json.load(open(data_path('f_question_words.json'),
                               'r'))['words']
    word_dict = dict()
    no_match_words = []
    for word in word_list:
        try:
            v = model[word]
            word_dict[word] = v
        except:
            no_match_words.append(word)
    json.dump({'missing': no_match_words},
              open(data_path('missing_words.json'), 'w'),
              indent=2)
    cPickle.dump(word_dict, open(data_path('word_dict.pkl'), 'wb'))
    pass
示例#2
0
文件: brain.py 项目: gregaw/ml
    def load_model_with_prefix(self, model_prefix):
        model_files = [x for x in os.listdir(data_path('')) if x.startswith(model_prefix) and x.endswith(".hdf5")]
        if len(model_files) > 1:
            raise Exception(
                "There's more than one model file with the prefix '{}': {}".format(model_prefix, model_files))
        elif not model_files:
            raise Exception("Didn't find anything with prefix: {} in folder {}".format(model_prefix, data_path('')))
        else:
            model = load_model(data_path(model_files[0]))

        return model
示例#3
0
文件: brain.py 项目: gregaw/ml
 def find_last_model(self, runid):
     saves = [x for x in os.listdir(data_path("")) if x.startswith(runid) and x.endswith('.hdf5')]
     if saves:
         print saves
         return sorted(map(lambda x: (int(x[len(runid) + 1:].split('_')[0]), x), saves), reverse=True)[0]
     else:
         return 0, None
示例#4
0
class Test_Mp_Login:

    titles = time.strftime("%H%M%S")

    def setup_class(self):
        self.driver = DriverUtils.open_driver()
        self.driver.get("http://ttmp.research.itcast.cn/")

    def teardown_class(self):
        DriverUtils.close_driver()

    @pytest.mark.parametrize("username,code,message,title,content,zhuanti",
                             data_path(BAS_URL + '/data/mp.json', 'login'))
    def test_01_login(self, username, code, message, title, content, zhuanti):
        self.title_name = title + self.titles
        try:
            # 登陆
            Page.get_login_page().mp_login_login(username, code)
            # 断言
            is_exists_element(message)
            # 存入日志
            logging.info("---------------->登陆成功")
            # 点击内容管理和发布文章
            Page.get_home_page().mp_home_ca()
            # 确认发布文章内容
            Page.get_pusair_page().mp_pusair_contant(self.title_name, content,
                                                     self.driver, zhuanti)
            logging.info("---------------->发布成功")
        except Exception:
            print("操作失败,请查看页面是否有问题")
            # 错误截图
            DriverUtils().screen_image()
            raise
        config.TITLE = self.title_name
        print(config.TITLE)
示例#5
0
class Test_Login:
    def setup_class(self):
        DriverUtils.open_driver().get("http://127.0.0.1/")

    def teardown_class(self):
        DriverUtils.close_driver()

    @pytest.mark.parametrize("username,pwd,code,nick_name,msg",
                             data_path(BAS_URL + '/data/tpshop.json', 'login'))
    def test_01_login(self, username, pwd, code, nick_name, msg):
        # 首页点击登陆跳转登陆页面
        message = Page.get_home_page().home_login()
        try:
            # 断言
            assert message == msg
            Page.get_login_page().login_login(username, pwd, code, nick_name)
            # 存入日志
            logging.info("用户名:%s....密码:%s...验证码:%s...修改用户名:%s" %
                         (username, pwd, code, nick_name))
            logging.info("---------------->登陆并修改用户名成功")
        except Exception:
            print("登陆不成功,请查看页面是否有问题")
            # 错误截图
            DriverUtils().screen_image()
            raise
示例#6
0
文件: tweeterator.py 项目: gregaw/ml
def compare_iteration(model_prefix, iterations, diversities, training_text, seed_sentence=None):
    result = {}
    index = 0
    for requested_iteration in iterations:
        for file_name in [x for x in os.listdir(data_path('')) if x.startswith(model_prefix)]:
            try:
                (runid, maxlen, step, lstm_size, rest) = file_name.split('-')
                (dropout, iteration, rest) = rest.split('_')
                if str(iteration) != str(requested_iteration):
                    continue
                (maxlen, step, lstm_size, dropout) = (int(maxlen), int(step), int(lstm_size), float(dropout))
                brain = Brain(maxlen=maxlen, lstm_size=lstm_size, dropout=dropout,
                              training_text=training_text)
                seed_sentence = seed_sentence or brain.random_seed_sentence()
                print 'sentence: ' + seed_sentence
                print '---- loading model: ' + file_name
                model = brain.load_model_with_prefix(file_name)

                length = 340

                for diversity in diversities:
                    generated = brain.generate_full(
                        model=model,
                        n=length,
                        diversity=diversity,
                        seed_sentence=seed_sentence)
                    result[(index, file_name, diversity)] = generated
                    index += 1
                    print generated
            except:
                print "Unexpected error with {}: {}".format(file_name, sys.exc_info()[1])
                raise

        for (ix, name, div), generated in sorted(result.iteritems()):
            print "ix={}, model={}, div={}| {}".format(ix, name, div, generated.encode('utf-8'))
示例#7
0
def gen_question_word_id_vec():
    """
    generate question word to vector
    :return:
    """
    word_dict = cPickle.load(open(data_path('word_dict.pkl'), 'rb'))
    word_vec = [numpy.zeros(300), numpy.zeros(300)]
    word_id_mapping = dict()
    for index, (word, vec) in enumerate(word_dict.items(), start=2):
        word_vec.append(vec)
        word_id_mapping[word] = index

    cPickle.dump(word_vec,
                 open(data_path('word_vec.pkl'), 'wb'),
                 protocol=cPickle.HIGHEST_PROTOCOL)
    json.dump(word_id_mapping,
              open(data_path('f_word_id_map.json'), 'w'),
              indent=2)
示例#8
0
def gen_image_id_feature():
    """
    generate image id to feature mapping
    :return:
    """
    image_id_path_mapping = json.load(
        open(data_path('f_image_id_path_map.json'), 'r'))
    get_vgg16_dense = VGG16_dense(include_top=True, weights='imagenet')
    image_id_feature_mapping = dict()
    bar = tqdm(total=len(image_id_path_mapping))
    for image_id, image_path in image_id_path_mapping.items():
        bar.update()
        x = preprocess_image(image_path)
        y = get_vgg16_dense([x])
        image_id_feature_mapping[image_id] = y[0][0]
    bar.close()
    cPickle.dump(image_id_feature_mapping,
                 open(data_path('image_id_feature_map.pkl'), 'wb'),
                 protocol=cPickle.HIGHEST_PROTOCOL)
示例#9
0
文件: interface.py 项目: ittner/sked
 def __init__(self, parent):
     self.parent = parent
     self.ui_init("about-dialog.ui")
     self.dlg = self.ui.get_object("dlgAbout")
     self.dlg.set_version(libsked.VERSION)
     try:
         icon = gdk.pixbuf_new_from_file(utils.data_path("sked.png"))
         self.dlg.set_logo(icon)
     except: pass
     self.ui.connect_signals(self)
示例#10
0
def gen_word_vector_mapping_glove():
    """
    generate word vec mapping
    :return:
    """
    word_list = json.load(open(data_path('f_question_words.json'),
                               'r'))['words']
    word_set = set(word_list)
    glove_file = codecs.open(data_path('glove.840B.300d.txt'),
                             'r',
                             encoding='utf8')
    word_dict = dict()
    for line in glove_file:
        seg = line.split(' ')
        word = seg[0]
        if word in word_set:
            word_dict[word] = numpy.asarray(seg[1:])
            word_set.remove(word)
    json.dump({'missing': list(word_set)},
              open(data_path('missing_words.json'), 'w'),
              indent=2)
    cPickle.dump(word_dict, open(data_path('word_dict.pkl'), 'wb'))
示例#11
0
def get_matrix(m='train'):
    images_list, questions_list, answers_list = cPickle.load(
        open(data_path('{}_matrix.pkl'.format(m)), 'rb'))
    images_list = numpy.asarray(images_list)
    questions_list = numpy.asarray(questions_list)
    questions_list = sequence.pad_sequences(questions_list,
                                            maxlen=QUESTION_LENGTH)
    if m == 'train':
        answers_list = numpy.asarray(answers_list)
        answers_list = np_utils.to_categorical(answers_list, MAX_ANSWER)
    else:
        pass
    return images_list, questions_list, answers_list
示例#12
0
def val_result(p_answers=None, val_answers=None):
    """
    evaluate predict result and accuracy
    :return:
    """

    if p_answers is None:
        p_answers = cPickle.load(open(data_path('predict.pkl'), 'rb'))
    if val_answers is None:
        val_images, val_questions, val_answers = get_matrix('val')
    assert len(p_answers) == len(val_answers)
    total = len(p_answers)
    count = 0
    for predict, val in zip(p_answers, val_answers):
        if predict in val:
            count += 1
    print(count, total, float(count) / float(total))
    pass
示例#13
0
文件: interface.py 项目: ittner/sked
 def _load_interface(self):
     self.ui_init("password-dialog.ui")
     self.dlg = self.ui.get_object("dlgPassword")
     self.lbGeneral = self.ui.get_object("lbGeneral")
     self.lbPassword = self.ui.get_object("lbPassword")
     self.lbNewPassword = self.ui.get_object("lbNewPassword")
     self.lbConfirmPassword = self.ui.get_object("lbConfirmPassword")
     self.lbPasswordQuality = self.ui.get_object("lbPasswordQuality")
     self.txPassword = self.ui.get_object("txPassword")
     self.txNewPassword = self.ui.get_object("txNewPassword")
     self.txConfirmPassword = self.ui.get_object("txConfirmPassword")
     self.pgPasswordQuality = self.ui.get_object("pgPasswordQuality")
     self.txPassword.set_visibility(False)
     self.txNewPassword.set_visibility(False)
     self.txConfirmPassword.set_visibility(False)
     try:
         self.dlg.set_icon_from_file(utils.data_path("sked.png"))
     except: pass
示例#14
0
    def run(self):
        """
        record is a string line
        """
        val_maps = []
        for val_idx in xrange(TRAIN_SET_NUM):
            # user ith dataset as a validate dataset
            self.val_idx = val_idx
            set_indexs = set(range(TRAIN_SET_NUM))
            set_indexs.discard(val_idx)

            self.train(set_indexs)
            val_res = self.validate()
            show_status(".. get map: " + str(val_res))
            val_maps.append(val_res)
        map_res = sum(val_maps) / TRAIN_SET_NUM
        show_status(".. get avage map: " + str(map_res))
        self.model.dataspace.tofile(data_path('models', str(map_res)))
示例#15
0
    def run(self):
        """
        record is a string line
        """
        val_maps = []
        for val_idx in xrange(TRAIN_SET_NUM):
            # user ith dataset as a validate dataset
            self.val_idx = val_idx
            set_indexs = set(range(TRAIN_SET_NUM))
            set_indexs.discard(val_idx)

            self.train(set_indexs)
            val_res = self.validate()
            show_status(".. get map: " + str(val_res)) 
            val_maps.append(val_res)
        map_res = sum(val_maps) / TRAIN_SET_NUM
        show_status(".. get avage map: " + str(map_res)) 
        self.model.dataspace.tofile(data_path('models', str(map_res)))
示例#16
0
def VQA():
    word_vec_list = cPickle.load(open(data_path('word_vec.pkl'), 'rb'))
    word_vec_len = len(word_vec_list)
    word_vec_list = np.asarray(word_vec_list)
    img_input = Input(shape=(4096,), name='input_img')
    x_img = Dense(1024, activation='tanh', name='fc1')(img_input)
    question_input = Input(shape=(QUESTION_LENGTH,), name='input_question')
    x_str = Embedding(word_vec_len, 300, input_length=QUESTION_LENGTH, mask_zero=True, weights=[word_vec_list])(
        question_input)
    x_str = LSTM(2048, dropout_W=0.5, consume_less='gpu')(x_str)
    x_str = Dense(1024, activation='tanh', name='fc4')(x_str)
    x_f = merge([x_img, x_str], mode='mul', name='merge1')
    x_f = Dense(MAX_ANSWER, activation='tanh', name='fc5')(x_f)
    x_f = Dropout(0.5)(x_f)
    x_f = Dense(MAX_ANSWER, activation='tanh', name='fc6')(x_f)
    x_f = Dropout(0.5)(x_f)
    x_f = Dense(MAX_ANSWER, activation='softmax', name='predictions')(x_f)
    model = Model(input=[img_input, question_input], output=x_f)

    return model
示例#17
0
文件: brain.py 项目: gregaw/ml
    def train(self, runid, iterations, step):
        if '-' in runid or '_' in runid:
            raise Exception("runid can't contain '-', nor '_'")

        epoch, last_run = self.find_last_model(self.output_prefix(runid, step))
        if last_run:
            print 'starting with: {} on epoch: {}'.format(last_run, epoch)
            model = load_model(data_path(last_run))
        else:
            print 'starting with a newly built model'
            model = self.build_model()

        sentences = []
        next_chars = []
        for i in range(0, len(self._text) - self._maxlen, step):
            sentences.append(self._text[i: i + self._maxlen])
            next_chars.append(self._text[i + self._maxlen])
        print('nb sentences:', len(sentences))

        print('Vectorization...')
        X = np.zeros((len(sentences), self._maxlen, len(self._chars)), dtype=np.bool)
        y = np.zeros((len(sentences), len(self._chars)), dtype=np.bool)
        for i, sentence in enumerate(sentences):
            for t, char in enumerate(sentence):
                X[i, t, self._char_indices[char]] = 1
            y[i, self._char_indices[next_chars[i]]] = 1

        # train the model, output generated text after each iteration
        history = LossHistory(self.output_prefix(runid, step), model, epoch)

        for iteration in range(epoch + 1, iterations):
            print()
            print('-' * 50)
            print('Iteration', iteration)

            model.fit(X, y, batch_size=128, nb_epoch=1, callbacks=[history], validation_split=0.1)

            self.generate_show(model, 140, [0.1, 0.4], self.random_seed_sentence())
class Test_Settlement:
    def setup_class(self):
        DriverUtils.open_driver().get("http://127.0.0.1/")

    def teardown_class(self):
        DriverUtils.close_driver()

    @pytest.mark.parametrize("msg",
                             data_path(BAS_URL + '/data/tpshop.json',
                                       'settlement'))
    def test_03_settlement(self, msg):
        try:
            # 点击首页的购物车显示并点击进入购物车结算
            Page.get_home_page().home_cart()
            # 进入购物车页面进行结算并提交
            message = Page.get_addcart_page().addcart_settlement()
            # 断言
            if message == msg:
                print("提交成功")
            logging.info("------------------->提交成功")
        except Exception:
            # 错误截图
            DriverUtils().screen_image()
            raise
class Test_Add_Cart:
    def setup_class(self):
        DriverUtils.open_driver().get("http://127.0.0.1/")

    def teardown_class(self):
        DriverUtils.close_driver()

    @pytest.mark.parametrize("product_name,msg",
                             data_path(BAS_URL + '/data/tpshop.json',
                                       'product'))
    def test_02_add_cart(self, product_name, msg):
        try:
            # 首页搜索
            Page.get_home_page().home_search(product_name)
            # 点击商品跳到商品详情页,把商品添加到购物车
            message = Page.get_product_page().product_add_cart()
            # 断言
            assert msg == message
            logging.info("------------------->%s" % msg)
        except Exception:
            print("添加失败")
            # 错误截图
            DriverUtils().screen_image()
            raise
示例#20
0
def load_record_dawg():
    return dawg_python.RecordDAWG(str('<H')).load(
        data_path('large', 'record_dawg.dawg'))
示例#21
0
def load_dawg():
    return dawg_python.DAWG().load(data_path('large', 'dawg.dawg'))
示例#22
0
def load_int_dawg():
    return dawg_python.IntDAWG().load(data_path('large', 'int_dawg.dawg'))
示例#23
0
    x_f = merge([x_img, x_str], mode='mul', name='merge1')
    x_f = Dense(MAX_ANSWER, activation='tanh', name='fc5')(x_f)
    x_f = Dropout(0.5)(x_f)
    x_f = Dense(MAX_ANSWER, activation='tanh', name='fc6')(x_f)
    x_f = Dropout(0.5)(x_f)
    x_f = Dense(MAX_ANSWER, activation='softmax', name='predictions')(x_f)
    model = Model(input=[img_input, question_input], output=x_f)

    return model


if __name__ == '__main__':
    # prepare_all()
    batch_size = 500
    epoch = 1
    train_images, train_questions, train_answers = get_matrix('train')
    val_images, val_questions, val_answers = get_matrix('val')
    m = VQA()
    rmsprop = RMSprop(lr=3e-4)
    m.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy'])
    for i in range(100):
        print(i)
        m.fit([train_images, train_questions], train_answers, batch_size=batch_size, nb_epoch=epoch)
        # m.save('vqa.h5')
        # m = load_model('vqa.h5')

        p = m.predict([val_images, val_questions], batch_size=batch_size, verbose=1)
        p_answers = p.argmax(axis=-1)
        cPickle.dump(p_answers, open(data_path('predict.pkl'), 'wb'), cPickle.HIGHEST_PROTOCOL)
        val_result(p_answers, val_answers)
示例#24
0
parser.add_argument('--debug',       action='store_true', help='debug mode: no saving or tensorboard')
parser.add_argument('--tensorboard', action='store_true', help='use TensorBoard')

# save path
parser.add_argument('--model_path', type=str, default="./models/") # /misc/vlgscratch2/ChoGroup/mansimov/
parser.add_argument('--log_path', type=str, default="./logs/") # /misc/vlgscratch2/ChoGroup/mansimov/
parser.add_argument('--event_path', type=str, default="./events/") # /misc/vlgscratch2/ChoGroup/mansimov/

parser.add_argument('--model_str', type=str, default="") # /misc/vlgscratch2/ChoGroup/mansimov/

# ----------------------------------------------------------------------------------------------------------------- #

args = parser.parse_args()
if args.prefix == '[time]':
    args.prefix = strftime("%m.%d_%H.%M.", gmtime())
args.data_prefix = data_path()

if args.train_repeat_dec > 1:
    if args.num_shared_dec == -1:
        args.num_shared_dec = args.train_repeat_dec
else:
    args.num_shared_dec = 1
assert args.num_shared_dec <= args.train_repeat_dec
assert args.num_shared_dec != -1

# get the langauage pairs:
args.src = args.language[:2]  # source language
args.trg = args.language[2:]  # target language

if args.params == 'normal':
    hparams = {'d_model': 278, 'd_hidden': 507,
示例#25
0
文件: interface.py 项目: ittner/sked
 def ui_init(self, fname):
     self.ui = gtk.Builder()
     self.ui.add_from_file(utils.data_path(fname))
示例#26
0
def get_answers_map():
    answers = g.get('_answers', None)
    if answers is None:
        answers_mapping = g._answers = json.load(open(data_path('f_answers_id_map.json'), 'r'))
        answers = {answer_id: answer_str for answer_str, answer_id in answers_mapping.items()}
    return answers
示例#27
0
文件: brain.py 项目: gregaw/ml
 def on_epoch_end(self, epoch, logs={}):
     self.epoch_count += 1
     save_model(
         self.model,
         data_path("{}_{}_{:.2f}.hdf5".format(self.prefix, self.epoch_count, logs.get('loss'))))
示例#28
0
def load_bytes_dawg():
    return dawg_python.BytesDAWG().load(data_path('large', 'bytes_dawg.dawg'))
示例#29
0
def load_int_dawg():
    return dawg_python.IntDAWG().load(data_path('large', 'int_dawg.dawg'))
示例#30
0
def load_bytes_dawg():
    return dawg_python.BytesDAWG().load(data_path('large', 'bytes_dawg.dawg'))
示例#31
0
def load_record_dawg():
    return dawg_python.RecordDAWG(str('<H')).load(data_path('large', 'record_dawg.dawg'))
示例#32
0
import os

import pandas as pd
from utils import data_path

ratings_dest = data_path('ratings.pkl')

all_ratings = None

for i, filename in enumerate(os.listdir(data_path('training_set'))):
    print("\rParsing {:4} of 1000".format(i), end=' ')
    with open(data_path('training_set', filename)) as f:
        movie_id = int(f.readline().replace(':', ''))
        ratings = pd.read_csv(f,
                              header=0,
                              names=['cust_id', 'stars', 'date'],
                              parse_dates=['date'])
        ratings['movie_id'] = movie_id

        if all_ratings is None:
            all_ratings = ratings
        else:
            all_ratings = pd.concat([all_ratings, ratings])
    if i == 1000:
        break

all_ratings.to_pickle(ratings_dest)
示例#33
0
def get_word_id_map():
    words = g.get('_words', None)
    if words is None:
        words = g._words = json.load(open(data_path('f_word_id_map.json'), 'r'))
    return words
示例#34
0
def load_dawg():
    return dawg_python.DAWG().load(data_path('large', 'dawg.dawg'))
示例#35
0
文件: xmlio.py 项目: ittner/sked
 def resolveEntity(self, publicId, systemId):
     if systemId == "sked.dtd":
         return utils.data_path(systemId)
     return None
示例#36
0
import os

import pandas as pd
from utils import data_path

ratings_dest = data_path('ratings.pkl')

all_ratings = None

for i, filename in enumerate(os.listdir(data_path('training_set'))):
    print("\rParsing {:4} of 1000".format(i), end=' ')
    with open(data_path('training_set', filename)) as f:
        movie_id = int(f.readline().replace(':', ''))
        ratings = pd.read_csv(f, header=0, names=['cust_id', 'stars', 'date'], parse_dates=['date'])
        ratings['movie_id'] = movie_id

        if all_ratings is None:
            all_ratings = ratings
        else:
            all_ratings = pd.concat([all_ratings, ratings])
    if i == 1000:
        break

all_ratings.to_pickle(ratings_dest)