Пример #1
0
 def prepare_training_data(self):
     self._dump2pickle('tmp/message.pickle')
     self._preprocess()
     self._tag_mapping()
     from analysis.select_samples import select_samples
     from analysis.select_samples import compute_order
     from analysis.select_samples import save_samples
     message = Serialize.loads(open('tmp/workspace.pickle').read())
     samples = select_samples(message)
     order = compute_order(samples)
     save_samples(samples, order, 'tmp/samples.pickle')
     return "done"
Пример #2
0
 def prepare_training_data(self):
     self._dump2pickle('tmp/message.pickle')
     self._preprocess()
     self._tag_mapping() 
     from analysis.select_samples import select_samples
     from analysis.select_samples import compute_order
     from analysis.select_samples import save_samples
     message = Serialize.loads(open('tmp/workspace.pickle').read())
     samples = select_samples(message)
     order = compute_order(samples)
     save_samples(samples, order, 'tmp/samples.pickle')
     return "done"
Пример #3
0
 def train(self, step = 100000):
     from analysis.autoweight import AutoWeight
     from analysis.autoweight import load_weights
     from analysis.autoweight import save_weights
     from analysis.autoweight import LearnerSigmoid
     data = Serialize.loads(open('tmp/samples.pickle').read())
     samples = data['samples']
     order = data['order']
     iweight = load_weights()
     aw = AutoWeight(samples, order, iweight, LearnerSigmoid())
     aw.sgd(step)
     save_weights(aw)
     self.score.load_weight()
     return "done"
Пример #4
0
 def train(self, step=100000):
     from analysis.autoweight import AutoWeight
     from analysis.autoweight import load_weights
     from analysis.autoweight import save_weights
     from analysis.autoweight import LearnerSigmoid
     data = Serialize.loads(open('tmp/samples.pickle').read())
     samples = data['samples']
     order = data['order']
     iweight = load_weights()
     aw = AutoWeight(samples, order, iweight, LearnerSigmoid())
     aw.sgd(step)
     save_weights(aw)
     self.score.load_weight()
     return "done"
Пример #5
0
    def _preprocess(self):
        import time

        # load
        begin = time.time()
        message = Serialize.loads(open('tmp/message.pickle').read())
        end = time.time()
        print "Load finish. Time elapsed: %.3f" % (end - begin)

        # Preprocessing

        # tag2msg and msg2tag dict
        tl = message['tag_list']
        td = {}
        td_r = {}
        for (msg_id, tag_id) in tl:
            if not msg_id in td:
                td[msg_id] = {}
            td[msg_id][tag_id] = 1
            if not tag_id in td_r:
                td_r[tag_id] = {}
            td_r[tag_id][msg_id] = 1
        message['dict_msg2tag'] = td
        message['dict_tag2msg'] = td_r

        # 1. add tags attributes to msg
        # 2. make msg dict
        # 3. make seen list
        ml = message['message_list']
        md = {}
        seen_list = []
        for m in ml:
            if m.flag == "seen":
                seen_list.append(m)
            if m.msg_id in td:
                m.tags = td[m.msg_id]
            else:
                m.tags = {}
            md[m.msg_id] = m
        message['dict_msg'] = md 
        message['seen_list'] = seen_list

        # save 
        begin = time.time()
        open('tmp/workspace.pickle', 'w').write(Serialize.dumps(message))
        end = time.time()
        print "Save finish. Time elapsed: %.3f" % (end - begin)
Пример #6
0
    def _preprocess(self):
        import time

        # load
        begin = time.time()
        message = Serialize.loads(open('tmp/message.pickle').read())
        end = time.time()
        print "Load finish. Time elapsed: %.3f" % (end - begin)

        # Preprocessing

        # tag2msg and msg2tag dict
        tl = message['tag_list']
        td = {}
        td_r = {}
        for (msg_id, tag_id) in tl:
            if not msg_id in td:
                td[msg_id] = {}
            td[msg_id][tag_id] = 1
            if not tag_id in td_r:
                td_r[tag_id] = {}
            td_r[tag_id][msg_id] = 1
        message['dict_msg2tag'] = td
        message['dict_tag2msg'] = td_r

        # 1. add tags attributes to msg
        # 2. make msg dict
        # 3. make seen list
        ml = message['message_list']
        md = {}
        seen_list = []
        for m in ml:
            if m.flag == "seen":
                seen_list.append(m)
            if m.msg_id in td:
                m.tags = td[m.msg_id]
            else:
                m.tags = {}
            md[m.msg_id] = m
        message['dict_msg'] = md
        message['seen_list'] = seen_list

        # save
        begin = time.time()
        open('tmp/workspace.pickle', 'w').write(Serialize.dumps(message))
        end = time.time()
        print "Save finish. Time elapsed: %.3f" % (end - begin)
Пример #7
0
 def _str2pyobj(self, message):
     return Serialize.loads(base64.decodestring(message))
Пример #8
0
 def _str2pyobj(self, message):
     return Serialize.loads(base64.decodestring(message))