Exemplo n.º 1
0
def evaluate(X_train, X_test, y_train, y_test, n_trials=4):
    jubatus_methods = ['perceptron', 'PA', 'PA1', 'PA2', 'CW', 'AROW', 'NHERD']
    sklearn_methods = [
        'Perceptron(sk)', 'PA1(sk)', 'PA2(sk)', 'LSVM(sk)', 'LR(sk)'
    ]
    results = dict.fromkeys(jubatus_methods + sklearn_methods, 0)
    vectorizer = TfidfVectorizer()
    for i in range(n_trials):
        X_train, y_train = shuffle(X_train, y_train, random_state=42)
        vec_X_train = vectorizer.fit_transform(X_train)
        vec_X_test = vectorizer.transform(X_test)
        for method in jubatus_methods:
            clf = linear_classifier(method=method)
            train_data = [(yi, Datum({'message': xi}))
                          for (xi, yi) in zip(X_train, y_train)]
            test_data = [Datum({'message': xi}) for xi in X_test]
            clf.train(train_data)
            predictions = clf.classify(test_data)
            y_pred = [
                max(pred, key=lambda x: x.score).label for pred in predictions
            ]
            test_score = accuracy_score(y_test, y_pred)
            print('{0:.3f}\t{1}'.format(test_score, method))
            results[method] += test_score
        for method in sklearn_methods:
            clf = sklearn_linear_classifier(method=method)
            clf.fit(vec_X_train, y_train)
            test_score = accuracy_score(y_test, clf.predict(vec_X_test))
            print('{0:.3f}\t{1}'.format(test_score, method))
            results[method] += test_score
    results = {k: v / n_trials for k, v in results.items()}
    return results
Exemplo n.º 2
0
def test():
    def _valid_result(r):
        assert isinstance(r, Datum)
        d = dict(r.num_values)
        assert d.get('x', None) is not None and d.get('y', None) is not None

    x = Recommender(RECOMMENDER_CONFIG)
    x.update_row('0', Datum({'x': 0.9, 'y': 4.9}))
    x.update_row('1', Datum({'x': 1, 'y': 5}))
    x.update_row('2', Datum({'x': 1.15, 'y': 5.15}))
    x.update_row('3', Datum({'x': 1.2, 'y': 5.1}))
    x.update_row('4', Datum({'x': 1.05}))
    _valid_result(x.complete_row_from_datum(Datum({'x': 1.1})))
    _valid_result(x.complete_row_from_id('4'))
    r = x.similar_row_from_id('2', 3)
    assert isinstance(r, list)
    assert isinstance(r[0], IdWithScore)
    r = x.similar_row_from_datum(Datum({'y': 5.05}), 3)
    assert isinstance(r, list)
    assert isinstance(r[0], IdWithScore)
    _valid_result(x.decode_row('0'))
    assert set(x.get_all_rows()) == set([str(i) for i in range(5)])
    r = x.calc_similarity(Datum({'x': 1}), Datum({'y': 5}))
    assert isinstance(r, float)
    r = x.calc_l2norm(Datum({'x': 1, 'y': 5}))
    assert isinstance(r, float)

    x.clear()
    assert len(x.get_all_rows()) == 0
    assert json.loads(x.get_config())

    # エラーが発生しないことだけ確認
    model = x.dump()
    x.load(model)
Exemplo n.º 3
0
 def test_get_nearest_members_light(self):
     for i in range(0, 100):
         d = Datum({"nkey1": i, "nkey2": -i})
         self.cli.push([IndexedPoint(str(i), d)])
     q = Datum({"nkey1": 2.0, "nkey2": 1.0})
     res = self.cli.get_nearest_members_light(q)
     self.assertTrue(isinstance(res[0], WeightedIndex))
    def test_num(self):
        x = Classifier(CONFIG)
        self.assertEqual(
            2, x.train([
                ('Y', Datum({'x': 1})),
                ('N', Datum({'x': -1})),
            ]))

        def _test_classify(x):
            y = x.classify([Datum({'x': 1}), Datum({'x': -1})])
            self.assertEqual(['Y', 'N'], [
                list(sorted(z, key=lambda x: x.score, reverse=True))[0].label
                for z in y
            ])
            self.assertEqual(x.get_labels(), {'N': 1, 'Y': 1})

        _test_classify(x)
        model = x.save_bytes()

        x.clear()
        self.assertEqual({}, x.get_labels())
        x.set_label('Y')
        x.set_label('N')
        self.assertEqual({'N': 0, 'Y': 0}, x.get_labels())
        x.delete_label(u'Y')
        self.assertEqual({'N': 0}, x.get_labels())

        x = Classifier(CONFIG)
        x.load_bytes(model)
        _test_classify(x)
        self.assertEqual(CONFIG, json.loads(x.get_config()))
    def test(self):
        w = Weight(CONFIG)
        d = Datum({'n0': 1, 'n1': 2, 'n2': 3, 't0': 'hello world'})
        for r in [w.update(d), w.calc_weight(d)]:
            self.assertEqual(5, len(r))
            for x in r:
                self.assertTrue(isinstance(x, Feature))
            m = dict([(x.key, x.value) for x in r])
            self.assertEqual(5, len(m))
            self.assertEqual(1.0, m['n0@num'])
            self.assertAlmostEqual(math.log(2), m['n1@log'])
            self.assertEqual(1.0, m['n2@str$3'])

        w.update(Datum({'t1': 'hello world'}))
        w.update(Datum({'t1': 'foo bar'}))
        w.update(Datum({'t1': 'hello'}))
        d = Datum({'t1': 'foo bar hello world hoge'})
        r0 = dict([(x.key, x.value) for x in w.calc_weight(d)])

        model = w.save_bytes()
        w = Weight(CONFIG)
        w.load_bytes(model)
        self.assertEqual(CONFIG, json.loads(w.get_config()))
        r1 = dict([(x.key, x.value) for x in w.calc_weight(d)])
        self.assertEqual(r0, r1)
Exemplo n.º 6
0
 def test_get_nearest_center(self):
     for i in range(0, 100):
         d = Datum({"nkey1": i, "nkey2": -i})
         self.cli.push([IndexedPoint(str(i), d)])
     q = Datum({"nkey1": 2.0, "nkey2": 1.0})
     res = self.cli.get_nearest_center(q)
     self.assertTrue(isinstance(res, Datum))
    def test_loadsave(self):
        x = Classifier(CONFIG)
        x.train([
            LabeledDatum('Y', Datum({'x': 'y'})),
            LabeledDatum('N', Datum({'x': 'n'})),
        ])
        path = '/tmp/127.0.0.1_0_classifier_hoge.jubatus'

        def _remove_model():
            try:
                os.remove(path)
            except Exception:
                pass

        _remove_model()
        try:
            self.assertEqual(
                {'127.0.0.1_0': '/tmp/127.0.0.1_0_classifier_hoge.jubatus'},
                x.save('hoge'))
            self.assertTrue(os.path.isfile(path))
            x = Classifier(CONFIG)
            self.assertTrue(x.load('hoge'))
            y = x.classify([Datum({'x': 'y'}), Datum({'x': 'n'})])
            self.assertEqual(['Y', 'N'], [
                list(sorted(z, key=lambda x: x.score, reverse=True))[0].label
                for z in y
            ])
        finally:
            _remove_model()
 def _test_classify(x):
     y = x.classify([Datum({'x': 1}), Datum({'x': -1})])
     self.assertEqual(['Y', 'N'], [
         list(sorted(z, key=lambda x: x.score, reverse=True))[0].label
         for z in y
     ])
     self.assertEqual(x.get_labels(), {'N': 1, 'Y': 1})
Exemplo n.º 9
0
def make_datum(data, headers):
    '''
    ヘッダのリストとデータの行を1 行受け取り、
    datum を作成する関数
    '''
    d = Datum()
    for k in headers:
        d.add_number(k, data[k])
    return d
Exemplo n.º 10
0
    def test(self):
        x = Regression(CONFIG)
        self.assertEqual(
            5,
            x.train([
                ScoredDatum(0.0, Datum({'x': 1.0})),
                ScoredDatum(1.0, Datum({'x': 2.0})),
                ScoredDatum(2.0, Datum({'x': 4.0})),
                ScoredDatum(3.0, Datum({'x': 8.0})),
                ScoredDatum(4.0, Datum({'x': 16.0})),
            ]))
        ret = x.estimate([
            Datum({'x': 32.0}),
            Datum({'x': 1.5}),
        ])
        self.assertEqual(2, len(ret))
        self.assertTrue(ret[0] >= 8.0 and ret[0] < 9.0)
        self.assertTrue(ret[1] >= 0.0 and ret[1] < 1.0)
        self.assertEqual(CONFIG, json.loads(x.get_config()))

        model = x.save_bytes()
        x = Regression(CONFIG)
        x.load_bytes(model)
        self.assertEqual(ret,
                         x.estimate([
                             Datum({'x': 32.0}),
                             Datum({'x': 1.5}),
                         ]))

        st = x.get_status()
        self.assertTrue(isinstance(st, dict))
        self.assertEqual(len(st), 1)
        self.assertEqual(list(st.keys())[0], 'embedded')
        self.assertTrue(isinstance(st['embedded'], dict))
Exemplo n.º 11
0
    def test(self):
        x = Regression(CONFIG)
        self.assertEqual(
            5,
            x.train([
                ScoredDatum(0.0, Datum({'x': 1.0})),
                ScoredDatum(1.0, Datum({'x': 2.0})),
                ScoredDatum(2.0, Datum({'x': 4.0})),
                ScoredDatum(3.0, Datum({'x': 8.0})),
                ScoredDatum(4.0, Datum({'x': 16.0})),
            ]))
        ret = x.estimate([
            Datum({'x': 32.0}),
            Datum({'x': 1.5}),
        ])
        self.assertEqual(2, len(ret))
        self.assertTrue(ret[0] >= 8.0 and ret[0] < 9.0)
        self.assertTrue(ret[1] >= 0.0 and ret[1] < 1.0)
        self.assertEqual(CONFIG, json.loads(x.get_config()))

        model = x.save_bytes()
        x = Regression(CONFIG)
        x.load_bytes(model)
        self.assertEqual(ret,
                         x.estimate([
                             Datum({'x': 32.0}),
                             Datum({'x': 1.5}),
                         ]))
Exemplo n.º 12
0
def test_classifier_str():
    x = Classifier(CLASSIFIER_CONFIG)
    x.train([
        (u'Y', Datum({'x': u'y'})),
        (u'N', Datum({'x': u'n'})),
    ])
    y = x.classify([Datum({'x': 'y'}), Datum({'x': 'n'})])
    assert [
        list(sorted(z, key=lambda x: x.score, reverse=True))[0].label
        for z in y
    ] == ['Y', 'N']
 def test_types(self):
     x = Classifier(CONFIG)
     x.train([
         LabeledDatum('Y', Datum({'x': 'y'})),
         LabeledDatum('N', Datum({'x': 'n'})),
     ])
     y = x.classify([Datum({'x': 'y'}), Datum({'x': 'n'})])
     self.assertTrue(isinstance(y[0][0], EstimateResult))
     self.assertEqual(['Y', 'N'], [
         list(sorted(z, key=lambda x: x.score, reverse=True))[0].label
         for z in y
     ])
 def test_str(self):
     x = Classifier(CONFIG)
     self.assertEqual(
         2, x.train([
             ('Y', Datum({'x': 'y'})),
             ('N', Datum({'x': 'n'})),
         ]))
     y = x.classify([Datum({'x': 'y'}), Datum({'x': 'n'})])
     self.assertEqual(['Y', 'N'], [
         list(sorted(z, key=lambda x: x.score, reverse=True))[0].label
         for z in y
     ])
Exemplo n.º 15
0
def main():
  args = parse_options()

  client = Classifier('127.0.0.1', args.port, 'test', 0)

  d = Datum()

  # Learn same data
  rand = random.randint(0, 1)
  d.add_number('key', 1.0 if rand else 2.0)

  print client.classify([d])
  print client.get_labels()
Exemplo n.º 16
0
def get_traindata(labels):
	traindata = []
	for index in range(n_train):
		imgfile = "{}.jpg"
		img = os.path.join(dir,imgfile.format(index))
		with open(img,"rb") as f:
			binary = f.read()
			label = labels[index][1]
			d = Datum()
			d.add_binary("image",binary)
			traindata.append([label,d])
	print ("num of train data :",len(traindata))
	return traindata
Exemplo n.º 17
0
  def convert(self, args):
    if len(args) % 2 != 0:
      raise ValueError('value for the last datum key ({0}) is missing'.format(args[len(args) - 1]))

    d = Datum()
    for i in range(int(len(args) / 2)):
      feat_key = args[i*2]
      feat_val = args[i*2+1]
      try:
        d.add_number(feat_key, float(feat_val))
      except ValueError:
        d.add_string(feat_key, feat_val)
    return (len(args), d)
Exemplo n.º 18
0
def get_traindata(labels):
	traindata = []
	for index in range(n_train):
		img = dir+str(index)+".jpg"
		with open(img,"rb") as f:
			binary = f.read()
			label = labels[index][1]
			print label,index
			d = Datum()
			d.add_binary("image",binary)
			traindata.append([label,d])
	print "num of train data :",len(traindata)
	return traindata
Exemplo n.º 19
0
def get_testdata(labels):
	testdata = []
	testlabels = []
	for index in range(n_train,(n_train+n_test)):
		img = dir+str(index)+".jpg"
		with open(img,"rb") as f:	
			binary = f.read()
			d = Datum()
			d.add_binary("image",binary)
			testdata.append([d])
			testlabels.append(labels[index][1])
	print "num of test  data :",len(testdata)
	return testdata
Exemplo n.º 20
0
def main():
    args = parse_options()

    client = Classifier('127.0.0.1', args.port, 'test', 0)

    d = Datum()

    # Learn same data
    rand = random.randint(0, 1)
    d.add_number('key', 1.0 if rand else 2.0)

    print client.classify([d])
    print client.get_labels()
Exemplo n.º 21
0
def get_traindata(labels):
    traindata = []
    for index in range(n_train):
        imgfile = "{}.jpg"
        img = os.path.join(dir, imgfile.format(index))
        with open(img, "rb") as f:
            binary = f.read()
            label = labels[index][1]
            d = Datum()
            d.add_binary("image", binary)
            traindata.append([label, d])
    print("num of train data :", len(traindata))
    return traindata
Exemplo n.º 22
0
def predict(client):
    # predict the last shogun
    data = [
        Datum({'name': '慶喜'}),
        Datum({'name': '義昭'}),
        Datum({'name': '守時'}),
    ]
    for d in data:
        res = client.classify([d])
        # get the predicted shogun name
        shogun_name = max(res[0], key=lambda x: x.score).label
        first_name = d.string_values[0][1]
        _output('{0} {1}\n'.format(shogun_name, first_name))
Exemplo n.º 23
0
def test():
    x = Clustering(CLUSTERING_CONFIG)
    assert x.get_revision() == 0
    assert x.push([
        Datum({'x': 1.0}),
        Datum({'x': 0.9}),
        Datum({'x': 1.1}),
        Datum({'x': 5.0}),
        Datum({'x': 4.9}),
        Datum({'x': 5.1}),
    ])
    assert x.get_revision() == 1
    centers = x.get_k_center()
    assert isinstance(centers, list) and len(centers) == 2
    assert isinstance(centers[0], Datum)
    members = x.get_core_members()
    assert isinstance(members, list) and len(members) == 2
    assert isinstance(members[0], list)
    assert isinstance(members[0][0], WeightedDatum)
    c = x.get_nearest_center(Datum({'x': 1.05}))
    assert isinstance(c, Datum)
    assert c.num_values[0][1] >= 0.9 and c.num_values[0][1] <= 1.1
    c = x.get_nearest_members(Datum({'x': 1.05}))
    assert isinstance(c, list)
    assert isinstance(c[0], WeightedDatum)
    assert json.loads(x.get_config())

    # エラーが発生しないことだけ確認
    model = x.dump()
    x.load(model)
Exemplo n.º 24
0
def get_testdata(labels, data):
    testdata = []
    with open(data, "rb") as f:
        for i in range(len(labels)):
            binary = f.read(28 * 28)
            d = Datum()
            d.add_binary("image", binary)
            # reader = csv.reader(f)
            # for row in reader:
            # 	d = Datum()
            # 	d.add_binary("image",row)
            testdata.append([d])
    print "num of test  data :", len(testdata)
    return testdata
Exemplo n.º 25
0
def predict(client):
    # predict the last shogun
    data = [
        Datum({'name': u'慶喜'}),
        Datum({'name': u'義昭'}),
        Datum({'name': u'守時'}),
    ]
    for d in data:
        res = client.classify([d])
        # get the predicted shogun name
        sys.stdout.write(max(res[0], key=lambda x: x.score).label)
        sys.stdout.write(' ')
        sys.stdout.write(d.string_values[0][1].encode('utf-8'))
        sys.stdout.write('\n')
Exemplo n.º 26
0
def get_testdata(labels):
    testdata = []
    testlabels = []
    for index in range(n_train, (n_train + n_test)):
        imgfile = "{}.jpg"
        img = os.path.join(dir, imgfile.format(index))
        with open(img, "rb") as f:
            binary = f.read()
            d = Datum()
            d.add_binary("image", binary)
            testdata.append([d])
            testlabels.append(labels[index][1])
    print("num of test  data :", len(testdata))
    return testdata
Exemplo n.º 27
0
def get_testdata(labels):
	testdata = []
	testlabels = []
	for index in range(n_train,(n_train+n_test)):
		imgfile = "{}.jpg"
		img = os.path.join(dir, imgfile.format(index))
		with open(img,"rb") as f:
			binary = f.read()
			d = Datum()
			d.add_binary("image",binary)
			testdata.append([d])
			testlabels.append(labels[index][1])
	print ("num of test  data :",len(testdata))
	return testdata
Exemplo n.º 28
0
def get_traindata(labels, data):
    traindata = []
    with open(data, "rb") as f:
        for i in range(len(labels)):
            binary = f.read(28 * 28)
            d = Datum()
            d.add_binary("image", binary)
            traindata.append([labels[i], d])
        # reader = csv.reader(f)
        # for i,row in enumerate(reader):
        # 	print row
        # 	d = Datum()
        # 	d.add_binary("image",row)
        # 	traindata.append([labels[i],d])
    print "num of train data :", len(traindata)
    return traindata
Exemplo n.º 29
0
 def test_get_core_members_light(self):
     for i in range(0, 100):
         d = Datum({"nkey1": i, "nkey2": -i})
         self.cli.push([IndexedPoint(str(i), d)])
     res = self.cli.get_core_members_light()
     self.assertEqual(len(res), 10)
     self.assertTrue(isinstance(res[0][0], WeightedIndex))
Exemplo n.º 30
0
 def test_get_k_center(self):
     for i in range(0, 100):
         d = Datum({"nkey1": i, "nkey2": -i})
         self.cli.push([IndexedPoint(str(i), d)])
     res = self.cli.get_k_center()
     self.assertEqual(len(res), 10)
     self.assertTrue(isinstance(res[0], Datum))
Exemplo n.º 31
0
 def test_decode_row(self):
     self.cli.clear_row("decode_row")
     d = Datum({"skey1": "val1", "skey2": "val2", "nkey1": 1.0, "nkey2": 2.0})
     self.cli.update_row("decode_row", d)
     decoded_row = self.cli.decode_row("decode_row")
     self.assertEqual(json.dumps(d.string_values), json.dumps(decoded_row.string_values))
     self.assertEqual(json.dumps(d.num_values), json.dumps(decoded_row.num_values))
Exemplo n.º 32
0
 def test_pack(self):
     self.assertEqual(
         msgpack.packb(([['name', 'Taro']], [['age', 20.0]], [])),
         msgpack.packb(Datum({
             'name': 'Taro',
             'age': 20
         }).to_msgpack()))
Exemplo n.º 33
0
    def on_status(self, status):
        if not hasattr(status, 'text'):
            return
        if not hasattr(status, 'coordinates'):
            return
        if not status.coordinates or not 'coordinates' in status.coordinates:
            return

        loc = None
        for l in self.locations:
            coordinates = status.coordinates['coordinates']
            if l.is_inside(coordinates[0], coordinates[1]):
                loc = l
                break
        if not loc:
            # Unknown location
            return
        hashtags = status.entities['hashtags']
        detagged_text = remove_hashtags_from_tweet(status.text, hashtags)

        # Create datum for Jubatus
        d = Datum({'text': detagged_text})

        # Send training data to Jubatus
        self.classifier.train([(loc.name, d)])

        # Print trained tweet
        print_green(loc.name, ' ')
        print(detagged_text)
Exemplo n.º 34
0
def add_data(num=10):
    data = generate_data(num)
    client = jubatus.Anomaly(HOST, PORT_P, NAME)
    for d in data:
        dt = Datum({"x": d[0], "y": d[1], "z": d[2]})
        result = client.add(dt)
        print('Added {0}, score = {1}'.format(result.id, result.score))
Exemplo n.º 35
0
    def predict(self, client):
        getpre  = preMongo()
        dic_pre = getpre.getDic()
        data = []
        predict_result = {}

        for line in dic_pre:
            value = dic_pre[line]['Value']
            data.append(Datum({'Value':value}))
        
        for d in data:
            res = client.classify([d])
            # getmongo.postDB(max(res[0], key=lambda x: x.score).label, str(d.num_values[0][1]))

            # sys.stdout.write(max(res[0], key=lambda x: x.score).label)
            # sys.stdout.write(' ')
            # sys.stdout.write(str(d.num_values[0][1]))
            # sys.stdout.write('\n')

            hoge = str(d.num_values[0][1])
            result = max(res[0], key=lambda x: x.score).label   
            predict_result.update({'Result': result, 
                                   'Value'  : hoge})

        return predict_result['Result']
Exemplo n.º 36
0
Arquivo: train.py Projeto: rimms/misc
def main():
  args = parse_options()

  client = Classifier('127.0.0.1', args.port, 'test', 0)

  for i in range(0, 1000000):
    d = Datum()

    # Learn same data
    rand = random.randint(0, 1)
    d.add_number('key', 1.0 if rand else 2.0)
    ld = LabeledDatum('Pos' if rand else 'Neg', d)

    client.train([ld])

    if not i % 10000:
      print 'train ' + str(i) + ' data'
Exemplo n.º 37
0
    def test_add_string(self):
        d = Datum()
        d.add_string('key', 'value')
        self.assertEquals(Datum({'key': 'value'}).to_msgpack(),
                          d.to_msgpack())

        d = Datum()
        d.add_string(u'key', u'value')
        self.assertEquals(Datum({'key': 'value'}).to_msgpack(),
                          d.to_msgpack())
Exemplo n.º 38
0
 def test_str(self):
     d = Datum()
     d.add_string('name', 'john')
     d.add_number('age', 20)
     d.add_binary('image', '0101')
     self.assertEquals('datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', \'0101\']]}',
                       str(d))
Exemplo n.º 39
0
 def test_str(self):
     d = Datum()
     d.add_string('name', 'john')
     d.add_number('age', 20)
     d.add_binary('image', b('0101'))
     s = str(d)
     self.assertTrue('datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', \'0101\']]}' == s or 'datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', b\'0101\']]}' == s)
Exemplo n.º 40
0
 def test_unpack(self):
     d = Datum.from_msgpack(([['name', 'Taro']], [['age', 20.0]], [['img', '0101']]))
     self.assertEquals(
         [('name', 'Taro')],
         d.string_values)
     self.assertEquals(
         [('age', 20.0)],
         d.num_values)
     self.assertEquals(
         [('img', '0101')],
         d.binary_values)
Exemplo n.º 41
0
def main():
    cl = Regression("127.0.0.1", 9199, "test")
    
    d = Datum()
    for i in xrange(10):
        d.add_number('x', 1)
        d.add_number('y', 4)
        cl.train([[10.0, d]])

    d = Datum()
    d.add_number('x', 1)
    d.add_number('y', 4)
    result = cl.estimate([d])
    print("{0:30.30f}".format(result[0]))
Exemplo n.º 42
0
    def _juba_proc(self, clock, datadict, method="add"):
        #if DEBUG:
        #    print datadict
        datum = Datum()
        for k in datadict.keys():
            #print "key:%s value:%s" % (str(k), str(datadict[k]))
            if k == "hostid":
                datum.add_number(str(k), int(datadict[k])*1.0/ZBX_ITEMID_DIGITS)
            elif k == "weekday" or k == "hour":
                datum.add_number(str(k), datadict[k])
            elif k != "hostid" and k != "weekday" and k != "hour":
                datum.add_number(str(k), self.norm(k, datadict[k]))
        #print datum

        retry_cnt = JUBA_RETRY_MAX
        while True:
            try:
                if method=="add":
                    print datum
                    ret = self.anom.add(datum)
                    exit()
                if method=="calc":
                    print datum
                    score = self.anom.calc_score(datum)
                    if score == float('Inf') or score > ML_LIMIT:
                        #print datadict
                        if self.alarm_on == False:
                            self.alarm_on = True
                            cf.log("[%s] score=%f" % (cf.clock2strjst(clock), score))
                    else:
                        if self.alarm_on == True:
                            self.alarm_on = False
                            cf.log("[%s] score recovered to normal:score=%f" % (cf.clock2strjst(clock), score))

                break
            except (msgpackrpc.error.TransportError, msgpackrpc.error.TimeoutError) as e:
                retry_count -= 1
                if retry_count <= 0:
                    raise
                self.anom.get_client().close()
                self.set_anom()

                print e
                time.sleep(JUBA_RETRY_INTERVAL)
                continue
Exemplo n.º 43
0
def main():
    # cl = jubatus.NearestNeighbor("localhost", 9199, "nn")
    cl = client.Recommender("localhost", 9199, "test")
    cl.clear()

    d = Datum()
    for i in xrange(10):
        d.add_number('x', 1)
        d.add_number('y', 4)
        
        cl.update_row(str(i), d)
        
    # scores = cl.similar_row_from_id(str(0), 10)
    # for score in scores:
    #     print("{0} {1:30.30f}".format(score.id, score.score))

    d = Datum()
    d.add_number('x', 1)
    d.add_number('y', 4)
    predicts = cl.similar_row_from_datum(d, 5)
    print(predicts)
    for predict  in predicts:
        print("{0} {1:30.30f}".format(predict.id, predict.score))
Exemplo n.º 44
0
def main():
    # cl = jubatus.NearestNeighbor("localhost", 9199, "nn")
    cl = Classifier("localhost", 9199, "test")
    cl.clear()

    d = Datum()
    for i in xrange(10):
        d.add_number('x', 1)
        d.add_number('y', 4)
        cl.train([LabeledDatum("label1", d)])
        # cl.set_row(str(i), d)
        
    # scores = cl.similar_row_from_id(str(0), 10)
    # for score in scores:
    #     print("{0} {1:30.30f}".format(score.id, score.score))

    d = Datum()
    d.add_number('x', 1)
    d.add_number('y', 4)
    predict = cl.classify([d])
    print(predict)
    for score in predict:
        print("{0} {1:30.30f}".format(score[0].label, score[0].score))
Exemplo n.º 45
0
    print('Stop running the job.')
    sys.exit(0)

if __name__ == '__main__':
    # 0. set KeyboardInterrupt handler
    signal.signal(signal.SIGINT, do_exit)

    # 1. set jubatus server
    anom = client.Anomaly("127.0.0.1", 9199, NAME)

    # 2. prepare training data
    with open('../kddcup.data_10_percent.txt', mode='r') as file:
        for line in file:
            duration, protocol_type, service, flag, src_bytes, dst_bytes, land, wrong_fragment, urgent, hot, num_failed_logins, logged_in, num_compromised, root_shell, su_attempted, num_root, num_file_creations, num_shells, num_access_files, num_outbound_cmds, is_host_login, is_guest_login, count, srv_count, serror_rate, srv_serror_rate, rerror_rate, srv_rerror_rate, same_srv_rate, diff_srv_rate, srv_diff_host_rate, dst_host_count, dst_host_srv_count, dst_host_same_srv_rate, dst_host_diff_srv_rate, dst_host_same_src_port_rate, dst_host_srv_diff_host_rate, dst_host_serror_rate, dst_host_srv_serror_rate, dst_host_rerror_rate, dst_host_srv_rerror_rate, label = line[:-1].split(",")

            datum = Datum()
            for (k, v) in [
                    ["protocol_type", protocol_type],
                    ["service", service],
                    ["flag", flag],
                    ["land", land],
                    ["logged_in", logged_in],
                    ["is_host_login", is_host_login],
                    ["is_guest_login", is_guest_login],
                    ]:
                datum.add_string(k, v)

            for (k, v) in [
                    ["duration",float(duration)],
                    ["src_bytes", float(src_bytes)],
                    ["dst_bytes", float(dst_bytes)],
Exemplo n.º 46
0
from jubatus.common import Datum
import jubatus

client = jubatus.Weight("127.0.0.1", 9199, "")

d = Datum()

d.add_number("user/age", 25)
d.add_number("user/income", 1000)
d.add_string("user/name", "Loren")
d.add_string("message", "Hello")

res = client.calc_weight(d)
print(res)
Exemplo n.º 47
0
import jubatus
from jubatus.common import Datum

import random

cl = jubatus.Recommender('127.0.0.1', 9199, 'test', 0)

random.seed(1)
datum_length = 100

for i in range(3):
    d = Datum()
    for x in range(datum_length):
        d.add_number("{}".format(x), random.random())
    cl.update_row(str(i), d)

print('ids:{}'.format(','.join(cl.get_all_rows())))  # 1, 2, 3

d = Datum()
for x in range(datum_length):
    d.add_number("{}".format(x), random.random())
cl.update_row('3', d)
print('ids:{}'.format(','.join(cl.get_all_rows())))  # unlearn 1 id

cl.save('test')
cl.clear()
cl.load('test')

print('ids:{}'.format(','.join(cl.get_all_rows())))  # should be same as before `save`

d = Datum()
Exemplo n.º 48
0
def gen_datum(filename):
    with open(filename,"rb") as f:
        binary = f.read()
        d = Datum()
        d.add_binary("image", binary)
    return d
Exemplo n.º 49
0
#-*- coding:utf-8 -*-

import jubatus
from jubatus.common import Datum

"""
画像特徴抽出プラグインを試す
"""

# ファイルをバイナリモードで開く
with open("./dataset/Lenna.jpg", "br") as f:
    data = f.read()
    d = Datum()
    d.add_binary("image", data)


client = jubatus.Weight("127.0.0.1", 9199, "")
res = client.calc_weight(d)
print(res)

Exemplo n.º 50
0
Arquivo: test.py Projeto: rimms/misc
def make_datum():
    d = Datum()
    d.add_string('string-key',   'str')
    d.add_number('number-key',     1.0)
    d.add_binary('binary-key',  b'bin')
    return d
Exemplo n.º 51
0
import sys, json
from jubatus.clustering import client
from jubatus.clustering import types
from jubatus.common import Datum

NAME = "clustering_compounds"
if __name__ == '__main__':
    clustering = client.Clustering("127.0.0.1", 9199, NAME)

    for line in open("../../bench_data/demo4096.smi"):
        smiles, id = line.split(" ")
        datum = Datum()
        datum.add_string("SMILES", smiles)
        clustering.push([datum])
    center_list = clustering.get_k_center()
    members = clustering.get_core_members()
    for i in range(0,4):
        for j in range(len(members[i])):
            print "%d, %d, %s " %(i, j, members[i][j])
#    print "%s \n" % center_list[4]
#    for i in range(len(center_list)):
#        print "%s \n" % center_list[i]
Exemplo n.º 52
0
Arquivo: test.py Projeto: rimms/misc
#!/usr/bin/env python

import random
import time

from jubatus.classifier.client import Classifier
from jubatus.classifier.types import LabeledDatum
from jubatus.common import Datum

data = []
for i in xrange(0, 100000):
    d = Datum()
    for j in xrange(0, 20):
        d.add_number(str(j) + "-" + str(i), random.random() + 1.0)

    ld = LabeledDatum("Pos" if random.randint(0, 1) else "Neg", d)
    data.append(ld)

client = Classifier("127.0.0.1", 9199, "test", 0)


start_time = time.time()
client.train(data)
end_time = time.time()

print str(len(data)) + " ... " + str((end_time - start_time) * 1000) + " msec"
Exemplo n.º 53
0
 def test_add_binary(self):
     d = Datum()
     d.add_binary('key', b('value'))
     self.assertEquals(
         ([], [], [['key', b('value')]]),
         d.to_msgpack())
Exemplo n.º 54
0
 def test_add_int(self):
     d = Datum()
     d.add_number('key', 1)
     self.assertEquals(Datum({'key': 1.0}).to_msgpack(),
                       d.to_msgpack())
Exemplo n.º 55
0
    con = MongoClient('172.16.4.84', 27017)
    db = con.sensordb
    col = db.anomaly

    # 1.Jubatus Serverへの接続設定
    anom = client.Anomaly("127.0.0.1",9199,NAME)

    # 2.学習用データの準備
    mongo_dic =  convertMongo()
    dic = mongo_dic.getDic()
    name = '' 
    value = 0
    for line in dic:
        name = dic[line]['name']
        value = dic[line]['value']
        datum = Datum()

        # for (k, v) in [
        #         ['name', name],
        #         ]:
        #     datum.add_string(k, v)
        
        for (k, v) in [
                ['value', value],
                ]:
            datum.add_number(k, v)
        
        # 3.データの学習(学習モデルの更新)
        ret = anom.add(datum)
        
        # 4.結果の出力
Exemplo n.º 56
0
import sys, json
from jubatus.clustering import client
from jubatus.clustering import types
from jubatus.common import Datum

NAME = "clustering_compounds"
if __name__ == '__main__':
    clustering = client.Clustering("127.0.0.1", 9199, NAME)

    datum = Datum()
    datum.add_string("SMILES", "cccccccc")
    print clustering.get_nearest_center(datum)
#    print "%s \n" % center_list[4]
#    for i in range(len(center_list)):
#        print "%s \n" % center_list[i]
Exemplo n.º 57
0
#-*- coding: utf-8 -*-

import json, sys
import jubatus
from jubatus.common import Datum
headlines = {}
#keys = ["HeadLine", "DateLine", "Language", "DateId", "NewsItemId", "article", "Genre1", "Genre2"]
with open(sys.argv[1], "r") as f:
    client = jubatus.Recommender("127.0.0.1", 9199, "hoge", 0)
    feeds = json.load(f, encoding="utf-8")
    for feed in feeds:
        d = Datum()
        keys = list(feed.keys())
        headlines[feed["NewsItemId"]] = feed["HeadLine"]
        for key in keys:
            try: 
                if key == "article":
                    d.add_string(key, " ".join(feed[key]))
                elif key == "NewsItemId":
                    article_id = feed[key].encode('utf-8')
                elif key == "HeadLine":
                    d.add_string(key, feed[key].encode('utf-8'))
                else:
                    d.add_string(key, feed[key].encode('utf-8'))
            except TypeError:
                print("ignore", key, " ".join(feed[key]))
            except AttributeError:
                print("ignore", key, feed[key])
        client.update_row(article_id, d)
    res = client.similar_row_from_id(article_id, 10)
    client.save("jubatus_hackathon")