Exemplo n.º 1
0
    def __init__(self, thread_name, event):
        super(Controller, self).__init__()
        self.name = thread_name
        self.threadEvent = event
        self.logger_info = utils.initlog('Console', 'Console.log')
        self.schedule = sched.scheduler(time.time, time.sleep)

        self.profiles_name, profiles = utils.load_profiles('profiles')
        self.related_tweets = [[] for _ in range(len(profiles))]
        self.pushed_tweets = [[] for _ in range(len(profiles))]
        self.pushed_tweets_ids = set([])
        self.related_tweets_hash = set([])

        self.classifier = Classifier()
        self.ranker = self.load_ranker()
        self.pusher = Pusher()
Exemplo n.º 2
0
    def process(self):
        data_file_path = sys.argv[1]
        files = os.listdir(data_file_path)
        files.sort()
        for f in files:
            filename = os.path.join(data_file_path, f)
            logging.info(filename)
            count = 0
            for line in open(filename, 'rb'):
                start = time.clock()
                tweet_text, tid_origin, tid_retweet, timestamp, tweet_json = utils.extract_text(line)
                simhash_value = Simhash(tweet_text).value
                if simhash_value in self.related_tweets_hash or tid_origin in self.pushed_tweets_ids or tid_retweet in self.pushed_tweets_ids:
                    continue

                topic_id, similarity = self.classifier.classify(tweet_text)
                if topic_id == '':
                    continue

                count += 1
                if count % 10000 == 0:  logging.info('%d' % count)

                tweet_json['similarity'] = similarity
                evaluate_score = self.ranker.predict(json.dumps(tweet_json))
                total_score = (evaluate_score ** 0.5) * similarity
                # if total_score < 0.15:
                #     continue

                timestruct = time.gmtime(int(timestamp[:-3]))
                is_pushed = self.pusher.push(total_score, topic_id, timestruct)
                if is_pushed:
                    delivery_time = float(timestamp) / 1000.0 + (time.clock() - start)
                    self.pushed_tweets[topic_id].append([tid_origin, str(delivery_time)[:10], similarity, total_score, tweet_text])

                utc_time = time.strftime('%Y%m%d', timestruct)
                self.related_tweets[topic_id].append([utc_time, tid_origin, total_score, tid_retweet, timestamp[:-3], tweet_text])

                self.related_tweets_hash.add(simhash_value)
                self.pushed_tweets_ids.add(tid_retweet)
            self.dump_result(f)
            self.pusher = Pusher()
        self.logger_info.info('\n=======finished!=======\n')
Exemplo n.º 3
0
last_layer_optimizer_specs = [{
    'params': ppnet.last_layer.parameters(),
    'lr': last_layer_optimizer_lr
}]
last_layer_optimizer = torch.optim.Adam(last_layer_optimizer_specs)

# train the model
log('start training')
pusher = Pusher(
    train_push_loader,
    prototype_network_parallel=ppnet_multi,
    bank_size=bank_size,
    class_specific=class_specific,
    preprocess_input_function=preprocess_input_function,  # normalize if needed
    prototype_layer_stride=1,
    dir_for_saving_prototypes=
    img_dir,  # if not None, prototypes will be saved here
    prototype_img_filename_prefix=prototype_img_filename_prefix,
    prototype_self_act_filename_prefix=prototype_self_act_filename_prefix,
    proto_bound_boxes_filename_prefix=proto_bound_boxes_filename_prefix,
    save_prototype_class_identity=True,
    log=log)

if do_initial_push:
    perform_push(pusher, 0)

for epoch in range(num_train_epochs):
    log('epoch: \t{0}'.format(epoch))

    if epoch < num_warm_epochs:
        tnt.warm_only(model=ppnet_multi, log=log)