示例#1
0
    def setUp(self):
        self.config = {
            "method": "AROW",
            "converter": {
                "string_filter_types": {},
                "string_filter_rules": [],
                "num_filter_types": {},
                "num_filter_rules": [],
                "string_types": {},
                "string_rules": [{
                    "key": "*",
                    "type": "str",
                    "sample_weight": "bin",
                    "global_weight": "bin"
                }],
                "num_types": {},
                "num_rules": [{
                    "key": "*",
                    "type": "num"
                }]
            },
            "parameter": {
                "regularization_weight": 1.001
            }
        }

        TestUtil.write_file('config_classifier.json', json.dumps(self.config))
        self.srv = TestUtil.fork_process('classifier', port,
                                         'config_classifier.json')
        try:
            self.cli = Classifier(host, port, "name")
        except:
            TestUtil.kill_process(self.srv)
            raise
示例#2
0
 def run(self):
     logging.debug('Start running with name: {0}, count: {1}'.format(
         self.name, self.count))
     client = Classifier('127.0.0.1', 9199, 'test')
     for i in range(0, self.count):
         client.save(self.name + str(i))
     logging.debug('Finished running')
示例#3
0
文件: do_mix.py 项目: rimms/misc
def main():
  args = parse_options()

  client = Classifier('127.0.0.1', args.port, 'test', 0)

  for i in range(0, 10000):
    client.do_mix()

    if not i % 100:
      status = client.get_status()
      for node in status.keys():
        print '\t'.join([str(i), node, status[node]['RSS']])
示例#4
0
def main():
    args = parse_options()

    client = Classifier('127.0.0.1', args.port, 'test', 0)

    for i in range(0, 10000):
        client.do_mix()

        if not i % 100:
            status = client.get_status()
            for node in status.keys():
                print '\t'.join([str(i), node, status[node]['RSS']])
示例#5
0
文件: predict.py 项目: rimms/misc
def main():
  args = parse_options()

  client = Classifier('127.0.0.1', args.port, 'test', 0)

  d = Datum()

  # Learn same data
  rand = random.randint(0, 1)
  d.add_number('key', 1.0 if rand else 2.0)

  print client.classify([d])
  print client.get_labels()
示例#6
0
    def setUp(self):
        self.config = {
            "method": "AROW",
            "converter": {
                "string_filter_types": {},
                "string_filter_rules": [],
                "num_filter_types": {},
                "num_filter_rules": [],
                "string_types": {},
                "string_rules": [{"key": "*", "type": "str", "sample_weight": "bin", "global_weight": "bin"}],
                "num_types": {},
                "num_rules": [{"key": "*", "type": "num"}]
                },
            "parameter": {
                "regularization_weight": 1.001
                }
            }

        TestUtil.write_file('config_classifier.json', json.dumps(self.config))
        self.srv = TestUtil.fork_process('classifier', port, 'config_classifier.json')
        try:
            self.cli = Classifier(host, port, "name")
        except:
            TestUtil.kill_process(self.srv)
            raise
示例#7
0
文件: train.py 项目: rimms/misc
def main():
    args = parse_options()

    client = Classifier('127.0.0.1', args.port, 'test', 0)

    for i in range(0, 1000000):
        d = Datum()

        # Learn same data
        rand = random.randint(0, 1)
        d.add_number('key', 1.0 if rand else 2.0)
        ld = LabeledDatum('Pos' if rand else 'Neg', d)

        client.train([ld])

        if not i % 10000:
            print 'train ' + str(i) + ' data'
示例#8
0
文件: train.py 项目: rimms/misc
def main():
  args = parse_options()

  client = Classifier('127.0.0.1', args.port, 'test', 0)

  for i in range(0, 1000000):
    d = Datum()

    # Learn same data
    rand = random.randint(0, 1)
    d.add_number('key', 1.0 if rand else 2.0)
    ld = LabeledDatum('Pos' if rand else 'Neg', d)

    client.train([ld])

    if not i % 10000:
      print 'train ' + str(i) + ' data'
示例#9
0
文件: predict.py 项目: rimms/misc
def main():
    args = parse_options()

    client = Classifier('127.0.0.1', args.port, 'test', 0)

    d = Datum()

    # Learn same data
    rand = random.randint(0, 1)
    d.add_number('key', 1.0 if rand else 2.0)

    print client.classify([d])
    print client.get_labels()
示例#10
0
def get_classify_data(usr):

    user = usr['display_name']
    print user

    options, remainder = parse_args()
    classifier = Classifier(options.server_ip,options.server_port, options.name, 10.0)

    #train
    data_reader = select_Interrest_Blog(user)
    for row in data_reader:
        label = row['CATEGORY']
        dat = row['TITLE']
        datum = Datum({"message": dat})
        classifier.train([LabeledDatum(label, datum)])


    url_list = []
    url_list = get_rss_data_from_catlist(usr,['social','fun','entertainment','game'])
    for data in url_list:
        title = data["title"]
        datum = Datum({"message": title})
        classifier.train([LabeledDatum('no', datum)])

#    print classifier.get_status()
#    print classifier.save("tutorial")
#    print classifier.load("tutorial")
#    print classifier.get_config()

    url_list = []
    ret1 = []
    ret2 = []
    url_list = get_rss_data_from_catlist(usr,['it','popular','life','knowledge'])
    for data in url_list:
        title = data["title"]
        datum = Datum({"message": title})
        ans = classifier.classify([datum])
        if ans != None:
            estm = get_most_likely(ans[0])
            if estm[0] == 'yes':
                ret1.append(data)
            else:
                ret2.append(data)

    print ret1
    print ""
    print ret2

    return ret1,ret2
        x_vector = numpy.array(dat)
        if first_flag == 1:
            train_data = numpy.hstack((train_data, x_vector))
            train_label = numpy.array(y_vector)
            first_flag = 0
        else:
            train_data = numpy.vstack((train_data, x_vector))
            train_label = numpy.array(y_vector)
    train_list = [train_data, train_label]
    return train_list


if __name__ == '__main__':
    options, remainder = parse_args()

    classifier = Classifier(options.server_ip,options.server_port, options.name, 10.0)


    train_list = cross_validation_python()
    data_train, data_test, label_train, label_test = train_test_split(train_list[0], train_list[1])

    for label, dat in izip(label_train, data_train):
        data_dict = json.loads(dat[0])
        datum = Datum(data_dict)
        classifier.train([LabeledDatum(label, datum)])





    count_ok = 0
示例#12
0
#!/usr/bin/env python

from jubatus.classifier.client import Classifier

for idx in xrange(1, 50):
    client = Classifier('127.0.0.1', 9199, 'test')
    for i in xrange(1, 10001):
        client.do_mix()
        if not i % 1000:
            status = client.get_status()
            for node in status.keys():
                print '\t'.join(
                    [str((idx * 10000) + i), node, status[node]['RSS']])
示例#13
0
文件: test.py 项目: rimms/misc
#!/usr/bin/env python

import random
import time

from jubatus.classifier.client import Classifier
from jubatus.classifier.types import LabeledDatum
from jubatus.common import Datum

data = []
for i in xrange(0, 100000):
    d = Datum()
    for j in xrange(0, 20):
        d.add_number(str(j) + "-" + str(i), random.random() + 1.0)

    ld = LabeledDatum("Pos" if random.randint(0, 1) else "Neg", d)
    data.append(ld)

client = Classifier("127.0.0.1", 9199, "test", 0)


start_time = time.time()
client.train(data)
end_time = time.time()

print str(len(data)) + " ... " + str((end_time - start_time) * 1000) + " msec"
示例#14
0
文件: get_status.py 项目: rimms/misc
#!/usr/bin/env python

from jubatus.classifier.client import Classifier
import time


for idx in xrange(1, 50):
    client = Classifier("127.0.0.1", 9199, "test")
    for i in xrange(1, 10001):
        status = client.get_status()
        if not i % 1000:
            for node in status.keys():
                print "\t".join([str((idx * 10000) + i), node, status[node]["RSS"]])
示例#15
0
文件: test.py 项目: rimms/misc
#!/usr/bin/env python

import random
import time

from jubatus.classifier.client import Classifier
from jubatus.classifier.types import LabeledDatum
from jubatus.common import Datum

data = []
for i in xrange(0, 100000):
    d = Datum()
    for j in xrange(0, 20):
        d.add_number(str(j) + '-' + str(i), random.random() + 1.0)

    ld = LabeledDatum('Pos' if random.randint(0, 1) else 'Neg', d)
    data.append(ld)

client = Classifier('127.0.0.1', 9199, 'test', 0)

start_time = time.time()
client.train(data)
end_time = time.time()

print str(len(data)) + ' ... ' + str((end_time - start_time) * 1000) + ' msec'
示例#16
0
class ClassifierTest(unittest.TestCase):
    def setUp(self):
        self.config = {
            "method": "AROW",
            "converter": {
                "string_filter_types": {},
                "string_filter_rules": [],
                "num_filter_types": {},
                "num_filter_rules": [],
                "string_types": {},
                "string_rules": [{"key": "*", "type": "str", "sample_weight": "bin", "global_weight": "bin"}],
                "num_types": {},
                "num_rules": [{"key": "*", "type": "num"}],
            },
            "parameter": {"regularization_weight": 1.001},
        }

        TestUtil.write_file("config_classifier.json", json.dumps(self.config))
        self.srv = TestUtil.fork_process("classifier", port, "config_classifier.json")
        try:
            self.cli = Classifier(host, port, "name")
        except:
            TestUtil.kill_process(self.srv)
            raise

    def tearDown(self):
        if self.cli:
            self.cli.get_client().close()
        TestUtil.kill_process(self.srv)

    def test_get_client(self):
        self.assertTrue(isinstance(self.cli.get_client(), msgpackrpc.client.Client))

    def test_get_config(self):
        config = self.cli.get_config()
        self.assertEqual(json.dumps(json.loads(config), sort_keys=True), json.dumps(self.config, sort_keys=True))

    def test_train(self):
        d = Datum({"skey1": "val1", "skey2": "val2", "nkey1": 1.0, "nkey2": 2.0})
        data = [["label", d]]
        self.assertEqual(self.cli.train(data), 1)

    def test_classify(self):
        d = Datum({"skey1": "val1", "skey2": "val2", "nkey1": 1.0, "nkey2": 2.0})
        data = [d]
        result = self.cli.classify(data)

    def test_set_label(self):
        self.assertEqual(self.cli.set_label("label"), True)

    def test_get_labels(self):
        self.cli.set_label("label")
        self.assertEqual(self.cli.get_labels(), {"label": 0})

    def test_delete_label(self):
        self.cli.set_label("label")
        self.assertEqual(self.cli.delete_label("label"), True)

    def test_save(self):
        self.assertEqual(len(self.cli.save("classifier.save_test.model")), 1)

    def test_load(self):
        model_name = "classifier.load_test.model"
        self.cli.save(model_name)
        self.assertEqual(self.cli.load(model_name), True)

    def test_get_status(self):
        self.cli.get_status()

    def test_str(self):
        self.assertEqual("estimate_result{label: label, score: 1.0}", str(EstimateResult("label", 1.0)))
示例#17
0
                client.train(train_data)

    result = client.classify([predict_data[0]])
    predicted = max(result[0], key=lambda x: x.score).label
    if answer == predicted:
        print('correct', end="\t")
    else:
        print('wrong', end="\t")
    print(answer, predicted, result, sep="\t")


if __name__ == '__main__':

    try:
        exclude = sys.argv[3]
        training = sys.argv[2]
        port = int(sys.argv[1])
    except:
        sys.stderr.write(
            "Usage: jubatus.py port_number training.tsv exclude name\n")
        sys.exit(7)

    localhost = '127.0.0.1'
    if len(sys.argv) > 4:
        name = sys.argv[4]
    else:
        name = 'Coded by Kohji'

    client = Classifier(localhost, port, name)  # connect to Jubatus
    train_and_predict(client, training)
示例#18
0
#!/usr/bin/env python

from jubatus.classifier.client import Classifier


for idx in xrange(1, 50):
  client = Classifier('127.0.0.1', 9199, 'test')
  for i in xrange(1, 10001):
    client.do_mix()
    if not i % 1000:
      status = client.get_status()
      for node in status.keys():
        print '\t'.join([str((idx * 10000) + i ), node, status[node]['RSS']])
示例#19
0
import argparse
import socket
from jubatus.classifier.client import Classifier

parser = argparse.ArgumentParser()
parser.add_argument("-n", "--name", help="set the name of the file to be saved")
parser.add_argument("--host", help="set the host address")
parser.add_argument("--port", help="set the port number")

args = parser.parse_args()
print(args)
host_ip = args.host if args.host else socket.gethostbyname(socket.gethostname())
port = args.port if args.port else 9199

client = Classifier(host_ip, port, '')
if args.name:
    client.save(args.name)
    print("file saved at /tmp of the "+host_ip+" unless you specified output path with -d/--datadir when you started server process.")
else:
    print("[Error] specify the model's name to be saved!")
示例#20
0
#!/usr/bin/env python

from jubatus.classifier.client import Classifier

client = Classifier('127.0.0.1', 9199, 'test')

for i in xrange(0, 10000):
    client.do_mix()
def main():
    client = Classifier("127.0.0.1", port, "sleeping", timeout)
    client.classify([])
示例#22
0
class ClassifierTest(unittest.TestCase):
    def setUp(self):
        self.config = {
            "method": "AROW",
            "converter": {
                "string_filter_types": {},
                "string_filter_rules": [],
                "num_filter_types": {},
                "num_filter_rules": [],
                "string_types": {},
                "string_rules": [{
                    "key": "*",
                    "type": "str",
                    "sample_weight": "bin",
                    "global_weight": "bin"
                }],
                "num_types": {},
                "num_rules": [{
                    "key": "*",
                    "type": "num"
                }]
            },
            "parameter": {
                "regularization_weight": 1.001
            }
        }

        TestUtil.write_file('config_classifier.json', json.dumps(self.config))
        self.srv = TestUtil.fork_process('classifier', port,
                                         'config_classifier.json')
        try:
            self.cli = Classifier(host, port, "name")
        except:
            TestUtil.kill_process(self.srv)
            raise

    def tearDown(self):
        if self.cli:
            self.cli.get_client().close()
        TestUtil.kill_process(self.srv)

    def test_get_client(self):
        self.assertTrue(
            isinstance(self.cli.get_client(), msgpackrpc.client.Client))

    def test_get_config(self):
        config = self.cli.get_config()
        self.assertEqual(json.dumps(json.loads(config), sort_keys=True),
                         json.dumps(self.config, sort_keys=True))

    def test_train(self):
        d = Datum({
            "skey1": "val1",
            "skey2": "val2",
            "nkey1": 1.0,
            "nkey2": 2.0
        })
        data = [["label", d]]
        self.assertEqual(self.cli.train(data), 1)

    def test_classify(self):
        d = Datum({
            "skey1": "val1",
            "skey2": "val2",
            "nkey1": 1.0,
            "nkey2": 2.0
        })
        data = [d]
        result = self.cli.classify(data)

    def test_set_label(self):
        self.assertEqual(self.cli.set_label("label"), True)

    def test_get_labels(self):
        self.cli.set_label("label")
        self.assertEqual(self.cli.get_labels(), {"label": 0})

    def test_delete_label(self):
        self.cli.set_label("label")
        self.assertEqual(self.cli.delete_label("label"), True)

    def test_save(self):
        self.assertEqual(len(self.cli.save("classifier.save_test.model")), 1)

    def test_load(self):
        model_name = "classifier.load_test.model"
        self.cli.save(model_name)
        self.assertEqual(self.cli.load(model_name), True)

    def test_get_status(self):
        self.cli.get_status()

    def test_str(self):
        self.assertEqual("estimate_result{label: label, score: 1.0}",
                         str(EstimateResult("label", 1.0)))
    result = {}
    result[0] = ''
    result[1] = 0
    for res in estm:
        if prob == None or res.score > prob:
            ans = res.label
            prob = res.score
            result[0] = ans
            result[1] = prob
    return result


if __name__ == '__main__':
    options, remainder = parse_args()

    classifier = Classifier(options.server_ip, options.server_port,
                            options.name, 10.0)

    print classifier.get_config()
    print classifier.get_status()

    for line in open('train.dat'):
        label, file = line[:-1].split(',')
        dat = open(file).read()
        datum = Datum({"message": dat})
        classifier.train([LabeledDatum(label, datum)])

    print classifier.get_status()

    print classifier.save("tutorial")

    print classifier.load("tutorial")
    result[0] = ''
    result[1] = 0
    for res in estm:
        if prob == None or res.score > prob :
            ans = res.label
            prob = res.score
            result[0] = ans
            result[1] = prob
    return result



if __name__ == '__main__':
    options, remainder = parse_args()

    classifier = Classifier(options.server_ip,options.server_port, options.name, 10.0)

    print classifier.get_config()
    print classifier.get_status()


    for line in open('train.dat'):
        label, file = line[:-1].split(',')
        dat = open(file).read()
        datum = Datum({"message": dat})
        classifier.train([LabeledDatum(label, datum)])

    print classifier.get_status()

    print classifier.save("tutorial")
示例#25
0
文件: do_mix.py 项目: rimms/misc
#!/usr/bin/env python

from jubatus.classifier.client import Classifier

client = Classifier('127.0.0.1', 9000, 'test', 0)
client.do_mix()
示例#26
0
文件: test.py 项目: rimms/misc
 def run(self):
   logging.debug('Start running with name: {0}, count: {1}'.format(self.name, self.count))
   client = Classifier('127.0.0.1', 9199, 'test')
   for i in range(0, self.count):
     client.save(self.name + str(i))
   logging.debug('Finished running')
import argparse
import socket
from jubatus.classifier.client import Classifier

parser = argparse.ArgumentParser()
parser.add_argument("-n", "--name", help="set the name of the file to load")
parser.add_argument("--host", help="set the host address")
parser.add_argument("--port", help="set the port number")

args = parser.parse_args()
print(args)
host_ip = args.host if args.host else socket.gethostbyname(
    socket.gethostname())
port = args.port if args.port else 9199

client = Classifier(host_ip, port, "")

if args.name:
    print(args.name)
    client.load(args.name)
    print("model " + args.name + " has been loaded")
else:
    print("[Error] specify the model's name to be loaded!")