def gen_word_vector_mapping( # model_name='googlenews', # model_name='glove', model_name='fasttext', ): """ generate word vec mapping :return: """ files = { 'googlenews': ('GoogleNews-vectors-negative300.bin', True), 'glove': ('glove.840B.300d.txt', False), 'fasttext': ('wiki.en.model.vec', False) } f = files[model_name] model = gensim.models.Word2Vec.load_word2vec_format( data_path(f[0]), binary=f[1], unicode_errors='strict') word_list = json.load(open(data_path('f_question_words.json'), 'r'))['words'] word_dict = dict() no_match_words = [] for word in word_list: try: v = model[word] word_dict[word] = v except: no_match_words.append(word) json.dump({'missing': no_match_words}, open(data_path('missing_words.json'), 'w'), indent=2) cPickle.dump(word_dict, open(data_path('word_dict.pkl'), 'wb')) pass
def load_model_with_prefix(self, model_prefix): model_files = [x for x in os.listdir(data_path('')) if x.startswith(model_prefix) and x.endswith(".hdf5")] if len(model_files) > 1: raise Exception( "There's more than one model file with the prefix '{}': {}".format(model_prefix, model_files)) elif not model_files: raise Exception("Didn't find anything with prefix: {} in folder {}".format(model_prefix, data_path(''))) else: model = load_model(data_path(model_files[0])) return model
def find_last_model(self, runid): saves = [x for x in os.listdir(data_path("")) if x.startswith(runid) and x.endswith('.hdf5')] if saves: print saves return sorted(map(lambda x: (int(x[len(runid) + 1:].split('_')[0]), x), saves), reverse=True)[0] else: return 0, None
class Test_Mp_Login: titles = time.strftime("%H%M%S") def setup_class(self): self.driver = DriverUtils.open_driver() self.driver.get("http://ttmp.research.itcast.cn/") def teardown_class(self): DriverUtils.close_driver() @pytest.mark.parametrize("username,code,message,title,content,zhuanti", data_path(BAS_URL + '/data/mp.json', 'login')) def test_01_login(self, username, code, message, title, content, zhuanti): self.title_name = title + self.titles try: # 登陆 Page.get_login_page().mp_login_login(username, code) # 断言 is_exists_element(message) # 存入日志 logging.info("---------------->登陆成功") # 点击内容管理和发布文章 Page.get_home_page().mp_home_ca() # 确认发布文章内容 Page.get_pusair_page().mp_pusair_contant(self.title_name, content, self.driver, zhuanti) logging.info("---------------->发布成功") except Exception: print("操作失败,请查看页面是否有问题") # 错误截图 DriverUtils().screen_image() raise config.TITLE = self.title_name print(config.TITLE)
class Test_Login: def setup_class(self): DriverUtils.open_driver().get("http://127.0.0.1/") def teardown_class(self): DriverUtils.close_driver() @pytest.mark.parametrize("username,pwd,code,nick_name,msg", data_path(BAS_URL + '/data/tpshop.json', 'login')) def test_01_login(self, username, pwd, code, nick_name, msg): # 首页点击登陆跳转登陆页面 message = Page.get_home_page().home_login() try: # 断言 assert message == msg Page.get_login_page().login_login(username, pwd, code, nick_name) # 存入日志 logging.info("用户名:%s....密码:%s...验证码:%s...修改用户名:%s" % (username, pwd, code, nick_name)) logging.info("---------------->登陆并修改用户名成功") except Exception: print("登陆不成功,请查看页面是否有问题") # 错误截图 DriverUtils().screen_image() raise
def compare_iteration(model_prefix, iterations, diversities, training_text, seed_sentence=None): result = {} index = 0 for requested_iteration in iterations: for file_name in [x for x in os.listdir(data_path('')) if x.startswith(model_prefix)]: try: (runid, maxlen, step, lstm_size, rest) = file_name.split('-') (dropout, iteration, rest) = rest.split('_') if str(iteration) != str(requested_iteration): continue (maxlen, step, lstm_size, dropout) = (int(maxlen), int(step), int(lstm_size), float(dropout)) brain = Brain(maxlen=maxlen, lstm_size=lstm_size, dropout=dropout, training_text=training_text) seed_sentence = seed_sentence or brain.random_seed_sentence() print 'sentence: ' + seed_sentence print '---- loading model: ' + file_name model = brain.load_model_with_prefix(file_name) length = 340 for diversity in diversities: generated = brain.generate_full( model=model, n=length, diversity=diversity, seed_sentence=seed_sentence) result[(index, file_name, diversity)] = generated index += 1 print generated except: print "Unexpected error with {}: {}".format(file_name, sys.exc_info()[1]) raise for (ix, name, div), generated in sorted(result.iteritems()): print "ix={}, model={}, div={}| {}".format(ix, name, div, generated.encode('utf-8'))
def gen_question_word_id_vec(): """ generate question word to vector :return: """ word_dict = cPickle.load(open(data_path('word_dict.pkl'), 'rb')) word_vec = [numpy.zeros(300), numpy.zeros(300)] word_id_mapping = dict() for index, (word, vec) in enumerate(word_dict.items(), start=2): word_vec.append(vec) word_id_mapping[word] = index cPickle.dump(word_vec, open(data_path('word_vec.pkl'), 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) json.dump(word_id_mapping, open(data_path('f_word_id_map.json'), 'w'), indent=2)
def gen_image_id_feature(): """ generate image id to feature mapping :return: """ image_id_path_mapping = json.load( open(data_path('f_image_id_path_map.json'), 'r')) get_vgg16_dense = VGG16_dense(include_top=True, weights='imagenet') image_id_feature_mapping = dict() bar = tqdm(total=len(image_id_path_mapping)) for image_id, image_path in image_id_path_mapping.items(): bar.update() x = preprocess_image(image_path) y = get_vgg16_dense([x]) image_id_feature_mapping[image_id] = y[0][0] bar.close() cPickle.dump(image_id_feature_mapping, open(data_path('image_id_feature_map.pkl'), 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
def __init__(self, parent): self.parent = parent self.ui_init("about-dialog.ui") self.dlg = self.ui.get_object("dlgAbout") self.dlg.set_version(libsked.VERSION) try: icon = gdk.pixbuf_new_from_file(utils.data_path("sked.png")) self.dlg.set_logo(icon) except: pass self.ui.connect_signals(self)
def gen_word_vector_mapping_glove(): """ generate word vec mapping :return: """ word_list = json.load(open(data_path('f_question_words.json'), 'r'))['words'] word_set = set(word_list) glove_file = codecs.open(data_path('glove.840B.300d.txt'), 'r', encoding='utf8') word_dict = dict() for line in glove_file: seg = line.split(' ') word = seg[0] if word in word_set: word_dict[word] = numpy.asarray(seg[1:]) word_set.remove(word) json.dump({'missing': list(word_set)}, open(data_path('missing_words.json'), 'w'), indent=2) cPickle.dump(word_dict, open(data_path('word_dict.pkl'), 'wb'))
def get_matrix(m='train'): images_list, questions_list, answers_list = cPickle.load( open(data_path('{}_matrix.pkl'.format(m)), 'rb')) images_list = numpy.asarray(images_list) questions_list = numpy.asarray(questions_list) questions_list = sequence.pad_sequences(questions_list, maxlen=QUESTION_LENGTH) if m == 'train': answers_list = numpy.asarray(answers_list) answers_list = np_utils.to_categorical(answers_list, MAX_ANSWER) else: pass return images_list, questions_list, answers_list
def val_result(p_answers=None, val_answers=None): """ evaluate predict result and accuracy :return: """ if p_answers is None: p_answers = cPickle.load(open(data_path('predict.pkl'), 'rb')) if val_answers is None: val_images, val_questions, val_answers = get_matrix('val') assert len(p_answers) == len(val_answers) total = len(p_answers) count = 0 for predict, val in zip(p_answers, val_answers): if predict in val: count += 1 print(count, total, float(count) / float(total)) pass
def _load_interface(self): self.ui_init("password-dialog.ui") self.dlg = self.ui.get_object("dlgPassword") self.lbGeneral = self.ui.get_object("lbGeneral") self.lbPassword = self.ui.get_object("lbPassword") self.lbNewPassword = self.ui.get_object("lbNewPassword") self.lbConfirmPassword = self.ui.get_object("lbConfirmPassword") self.lbPasswordQuality = self.ui.get_object("lbPasswordQuality") self.txPassword = self.ui.get_object("txPassword") self.txNewPassword = self.ui.get_object("txNewPassword") self.txConfirmPassword = self.ui.get_object("txConfirmPassword") self.pgPasswordQuality = self.ui.get_object("pgPasswordQuality") self.txPassword.set_visibility(False) self.txNewPassword.set_visibility(False) self.txConfirmPassword.set_visibility(False) try: self.dlg.set_icon_from_file(utils.data_path("sked.png")) except: pass
def run(self): """ record is a string line """ val_maps = [] for val_idx in xrange(TRAIN_SET_NUM): # user ith dataset as a validate dataset self.val_idx = val_idx set_indexs = set(range(TRAIN_SET_NUM)) set_indexs.discard(val_idx) self.train(set_indexs) val_res = self.validate() show_status(".. get map: " + str(val_res)) val_maps.append(val_res) map_res = sum(val_maps) / TRAIN_SET_NUM show_status(".. get avage map: " + str(map_res)) self.model.dataspace.tofile(data_path('models', str(map_res)))
def VQA(): word_vec_list = cPickle.load(open(data_path('word_vec.pkl'), 'rb')) word_vec_len = len(word_vec_list) word_vec_list = np.asarray(word_vec_list) img_input = Input(shape=(4096,), name='input_img') x_img = Dense(1024, activation='tanh', name='fc1')(img_input) question_input = Input(shape=(QUESTION_LENGTH,), name='input_question') x_str = Embedding(word_vec_len, 300, input_length=QUESTION_LENGTH, mask_zero=True, weights=[word_vec_list])( question_input) x_str = LSTM(2048, dropout_W=0.5, consume_less='gpu')(x_str) x_str = Dense(1024, activation='tanh', name='fc4')(x_str) x_f = merge([x_img, x_str], mode='mul', name='merge1') x_f = Dense(MAX_ANSWER, activation='tanh', name='fc5')(x_f) x_f = Dropout(0.5)(x_f) x_f = Dense(MAX_ANSWER, activation='tanh', name='fc6')(x_f) x_f = Dropout(0.5)(x_f) x_f = Dense(MAX_ANSWER, activation='softmax', name='predictions')(x_f) model = Model(input=[img_input, question_input], output=x_f) return model
def train(self, runid, iterations, step): if '-' in runid or '_' in runid: raise Exception("runid can't contain '-', nor '_'") epoch, last_run = self.find_last_model(self.output_prefix(runid, step)) if last_run: print 'starting with: {} on epoch: {}'.format(last_run, epoch) model = load_model(data_path(last_run)) else: print 'starting with a newly built model' model = self.build_model() sentences = [] next_chars = [] for i in range(0, len(self._text) - self._maxlen, step): sentences.append(self._text[i: i + self._maxlen]) next_chars.append(self._text[i + self._maxlen]) print('nb sentences:', len(sentences)) print('Vectorization...') X = np.zeros((len(sentences), self._maxlen, len(self._chars)), dtype=np.bool) y = np.zeros((len(sentences), len(self._chars)), dtype=np.bool) for i, sentence in enumerate(sentences): for t, char in enumerate(sentence): X[i, t, self._char_indices[char]] = 1 y[i, self._char_indices[next_chars[i]]] = 1 # train the model, output generated text after each iteration history = LossHistory(self.output_prefix(runid, step), model, epoch) for iteration in range(epoch + 1, iterations): print() print('-' * 50) print('Iteration', iteration) model.fit(X, y, batch_size=128, nb_epoch=1, callbacks=[history], validation_split=0.1) self.generate_show(model, 140, [0.1, 0.4], self.random_seed_sentence())
class Test_Settlement: def setup_class(self): DriverUtils.open_driver().get("http://127.0.0.1/") def teardown_class(self): DriverUtils.close_driver() @pytest.mark.parametrize("msg", data_path(BAS_URL + '/data/tpshop.json', 'settlement')) def test_03_settlement(self, msg): try: # 点击首页的购物车显示并点击进入购物车结算 Page.get_home_page().home_cart() # 进入购物车页面进行结算并提交 message = Page.get_addcart_page().addcart_settlement() # 断言 if message == msg: print("提交成功") logging.info("------------------->提交成功") except Exception: # 错误截图 DriverUtils().screen_image() raise
class Test_Add_Cart: def setup_class(self): DriverUtils.open_driver().get("http://127.0.0.1/") def teardown_class(self): DriverUtils.close_driver() @pytest.mark.parametrize("product_name,msg", data_path(BAS_URL + '/data/tpshop.json', 'product')) def test_02_add_cart(self, product_name, msg): try: # 首页搜索 Page.get_home_page().home_search(product_name) # 点击商品跳到商品详情页,把商品添加到购物车 message = Page.get_product_page().product_add_cart() # 断言 assert msg == message logging.info("------------------->%s" % msg) except Exception: print("添加失败") # 错误截图 DriverUtils().screen_image() raise
def load_record_dawg(): return dawg_python.RecordDAWG(str('<H')).load( data_path('large', 'record_dawg.dawg'))
def load_dawg(): return dawg_python.DAWG().load(data_path('large', 'dawg.dawg'))
def load_int_dawg(): return dawg_python.IntDAWG().load(data_path('large', 'int_dawg.dawg'))
x_f = merge([x_img, x_str], mode='mul', name='merge1') x_f = Dense(MAX_ANSWER, activation='tanh', name='fc5')(x_f) x_f = Dropout(0.5)(x_f) x_f = Dense(MAX_ANSWER, activation='tanh', name='fc6')(x_f) x_f = Dropout(0.5)(x_f) x_f = Dense(MAX_ANSWER, activation='softmax', name='predictions')(x_f) model = Model(input=[img_input, question_input], output=x_f) return model if __name__ == '__main__': # prepare_all() batch_size = 500 epoch = 1 train_images, train_questions, train_answers = get_matrix('train') val_images, val_questions, val_answers = get_matrix('val') m = VQA() rmsprop = RMSprop(lr=3e-4) m.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy']) for i in range(100): print(i) m.fit([train_images, train_questions], train_answers, batch_size=batch_size, nb_epoch=epoch) # m.save('vqa.h5') # m = load_model('vqa.h5') p = m.predict([val_images, val_questions], batch_size=batch_size, verbose=1) p_answers = p.argmax(axis=-1) cPickle.dump(p_answers, open(data_path('predict.pkl'), 'wb'), cPickle.HIGHEST_PROTOCOL) val_result(p_answers, val_answers)
parser.add_argument('--debug', action='store_true', help='debug mode: no saving or tensorboard') parser.add_argument('--tensorboard', action='store_true', help='use TensorBoard') # save path parser.add_argument('--model_path', type=str, default="./models/") # /misc/vlgscratch2/ChoGroup/mansimov/ parser.add_argument('--log_path', type=str, default="./logs/") # /misc/vlgscratch2/ChoGroup/mansimov/ parser.add_argument('--event_path', type=str, default="./events/") # /misc/vlgscratch2/ChoGroup/mansimov/ parser.add_argument('--model_str', type=str, default="") # /misc/vlgscratch2/ChoGroup/mansimov/ # ----------------------------------------------------------------------------------------------------------------- # args = parser.parse_args() if args.prefix == '[time]': args.prefix = strftime("%m.%d_%H.%M.", gmtime()) args.data_prefix = data_path() if args.train_repeat_dec > 1: if args.num_shared_dec == -1: args.num_shared_dec = args.train_repeat_dec else: args.num_shared_dec = 1 assert args.num_shared_dec <= args.train_repeat_dec assert args.num_shared_dec != -1 # get the langauage pairs: args.src = args.language[:2] # source language args.trg = args.language[2:] # target language if args.params == 'normal': hparams = {'d_model': 278, 'd_hidden': 507,
def ui_init(self, fname): self.ui = gtk.Builder() self.ui.add_from_file(utils.data_path(fname))
def get_answers_map(): answers = g.get('_answers', None) if answers is None: answers_mapping = g._answers = json.load(open(data_path('f_answers_id_map.json'), 'r')) answers = {answer_id: answer_str for answer_str, answer_id in answers_mapping.items()} return answers
def on_epoch_end(self, epoch, logs={}): self.epoch_count += 1 save_model( self.model, data_path("{}_{}_{:.2f}.hdf5".format(self.prefix, self.epoch_count, logs.get('loss'))))
def load_bytes_dawg(): return dawg_python.BytesDAWG().load(data_path('large', 'bytes_dawg.dawg'))
def load_record_dawg(): return dawg_python.RecordDAWG(str('<H')).load(data_path('large', 'record_dawg.dawg'))
import os import pandas as pd from utils import data_path ratings_dest = data_path('ratings.pkl') all_ratings = None for i, filename in enumerate(os.listdir(data_path('training_set'))): print("\rParsing {:4} of 1000".format(i), end=' ') with open(data_path('training_set', filename)) as f: movie_id = int(f.readline().replace(':', '')) ratings = pd.read_csv(f, header=0, names=['cust_id', 'stars', 'date'], parse_dates=['date']) ratings['movie_id'] = movie_id if all_ratings is None: all_ratings = ratings else: all_ratings = pd.concat([all_ratings, ratings]) if i == 1000: break all_ratings.to_pickle(ratings_dest)
def get_word_id_map(): words = g.get('_words', None) if words is None: words = g._words = json.load(open(data_path('f_word_id_map.json'), 'r')) return words
def resolveEntity(self, publicId, systemId): if systemId == "sked.dtd": return utils.data_path(systemId) return None