def prepare_training_data(self): self._dump2pickle('tmp/message.pickle') self._preprocess() self._tag_mapping() from analysis.select_samples import select_samples from analysis.select_samples import compute_order from analysis.select_samples import save_samples message = Serialize.loads(open('tmp/workspace.pickle').read()) samples = select_samples(message) order = compute_order(samples) save_samples(samples, order, 'tmp/samples.pickle') return "done"
def train(self, step = 100000): from analysis.autoweight import AutoWeight from analysis.autoweight import load_weights from analysis.autoweight import save_weights from analysis.autoweight import LearnerSigmoid data = Serialize.loads(open('tmp/samples.pickle').read()) samples = data['samples'] order = data['order'] iweight = load_weights() aw = AutoWeight(samples, order, iweight, LearnerSigmoid()) aw.sgd(step) save_weights(aw) self.score.load_weight() return "done"
def train(self, step=100000): from analysis.autoweight import AutoWeight from analysis.autoweight import load_weights from analysis.autoweight import save_weights from analysis.autoweight import LearnerSigmoid data = Serialize.loads(open('tmp/samples.pickle').read()) samples = data['samples'] order = data['order'] iweight = load_weights() aw = AutoWeight(samples, order, iweight, LearnerSigmoid()) aw.sgd(step) save_weights(aw) self.score.load_weight() return "done"
def _preprocess(self): import time # load begin = time.time() message = Serialize.loads(open('tmp/message.pickle').read()) end = time.time() print "Load finish. Time elapsed: %.3f" % (end - begin) # Preprocessing # tag2msg and msg2tag dict tl = message['tag_list'] td = {} td_r = {} for (msg_id, tag_id) in tl: if not msg_id in td: td[msg_id] = {} td[msg_id][tag_id] = 1 if not tag_id in td_r: td_r[tag_id] = {} td_r[tag_id][msg_id] = 1 message['dict_msg2tag'] = td message['dict_tag2msg'] = td_r # 1. add tags attributes to msg # 2. make msg dict # 3. make seen list ml = message['message_list'] md = {} seen_list = [] for m in ml: if m.flag == "seen": seen_list.append(m) if m.msg_id in td: m.tags = td[m.msg_id] else: m.tags = {} md[m.msg_id] = m message['dict_msg'] = md message['seen_list'] = seen_list # save begin = time.time() open('tmp/workspace.pickle', 'w').write(Serialize.dumps(message)) end = time.time() print "Save finish. Time elapsed: %.3f" % (end - begin)
def _str2pyobj(self, message): return Serialize.loads(base64.decodestring(message))