示例#1
0
  def test_convert_matrix(self):
    for in_fmt in ('binary', 'json'):
      for out_fmt in ('binary', 'json', 'text'):
        # input
        if in_fmt == 'binary':
          m = JubaModel.load_binary(_get_binary_file())
        else:
          m = JubaModel.load_json(_get_json_file())

        # output
        if out_fmt == 'binary':
          f = BytesIO()
          m.dump_binary(f)
          f.seek(0)
          m2 = JubaModel.load_binary(f)
        elif out_fmt == 'json':
          f = StringIO()
          m.dump_json(f)
          f.seek(0)
          m2 = JubaModel.load_json(f)
        elif out_fmt == 'text':
          m.dump_text(StringIO())
          continue

        # check
        for ((k1, v1), (k2, v2)) in zip(m.header.get(), m2.header.get()):
          self.assertEqual(k1, k2)
          self.assertEqual(v1, v2)
        for ((k1, v1), (k2, v2)) in zip(m.user.get(), m2.user.get()):
          self.assertEqual(k1, k2)
          self.assertEqual(v1, v2)
        for ((k1, v1), (k2, v2)) in zip(m.system.get(), m2.system.get()):
          self.assertEqual(k1, k2)
          self.assertEqual(v1, v2)
示例#2
0
 def _get_model(self, service, config):
   # Create empty model for the given service.
   s = service.run(config)
   path = None
   try:
     s.save('test')
     path = s.get_status().popitem()[1]['last_saved_path']
     with open(path, 'rb') as f:
       return JubaModel.load_binary(f)
   finally:
     s.stop()
     if path and os.path.exists(path):
       os.remove(path)
示例#3
0
 def _get_model(self, service, config):
     # Create empty model for the given service.
     s = service.run(config)
     path = None
     try:
         s.save('test')
         path = s.get_status().popitem()[1]['last_saved_path']
         with open(path, 'rb') as f:
             return JubaModel.load_binary(f)
     finally:
         s.stop()
         if path and os.path.exists(path):
             os.remove(path)
示例#4
0
    def test_convert_matrix(self):
        for in_fmt in ('binary', 'json'):
            for out_fmt in ('binary', 'json', 'text'):
                # input
                if in_fmt == 'binary':
                    m = JubaModel.load_binary(_get_binary_file())
                else:
                    m = JubaModel.load_json(_get_json_file())

                # output
                if out_fmt == 'binary':
                    f = BytesIO()
                    m.dump_binary(f)
                    f.seek(0)
                    m2 = JubaModel.load_binary(f)
                elif out_fmt == 'json':
                    f = StringIO()
                    m.dump_json(f)
                    f.seek(0)
                    m2 = JubaModel.load_json(f)
                elif out_fmt == 'text':
                    m.dump_text(StringIO())
                    continue

                # check
                for ((k1, v1), (k2, v2)) in zip(m.header.get(),
                                                m2.header.get()):
                    self.assertEqual(k1, k2)
                    self.assertEqual(v1, v2)
                for ((k1, v1), (k2, v2)) in zip(m.user.get(), m2.user.get()):
                    self.assertEqual(k1, k2)
                    self.assertEqual(v1, v2)
                for ((k1, v1), (k2, v2)) in zip(m.system.get(),
                                                m2.system.get()):
                    self.assertEqual(k1, k2)
                    self.assertEqual(v1, v2)
示例#5
0
    def test_json(self):
        # get a valid JSON model file
        f = _get_json_file(True)

        # load it
        m = JubaModel.load_json(f)

        self.assertEqual(1, m.header.jubatus_version_major)
        self.assertEqual(2, m.header.jubatus_version_minor)
        self.assertEqual(3, m.header.jubatus_version_maint)

        self.assertNotEqual(0, m.header.crc32)

        self.assertEqual(TEST_JSON['system']['config'], m.system.config)
        self.assertEqual('classifier', m.system.type)
        self.assertEqual(1, m.user.version)
        self.assertEqual(TEST_JSON['user']['user_data'], m.user.user_data)
        self.assertTrue(m._user_raw is not None)
示例#6
0
  def test_json(self):
    # get a valid JSON model file
    f = _get_json_file(True)

    # load it
    m = JubaModel.load_json(f)

    self.assertEqual(1, m.header.jubatus_version_major)
    self.assertEqual(2, m.header.jubatus_version_minor)
    self.assertEqual(3, m.header.jubatus_version_maint)

    self.assertNotEqual(0, m.header.crc32)

    self.assertEqual(TEST_JSON['system']['config'], m.system.config)
    self.assertEqual('classifier', m.system.type)
    self.assertEqual(1, m.user.version)
    self.assertEqual(TEST_JSON['user']['user_data'], m.user.user_data)
    self.assertTrue(m._user_raw is not None)
示例#7
0
  def test_binary(self):
    # get a valid binary model file
    f = _get_binary_file()

    # enable validation: must be loaded successfully
    m = JubaModel.load_binary(f, True)

    self.assertEqual(1, m.header.jubatus_version_major)
    self.assertEqual(2, m.header.jubatus_version_minor)
    self.assertEqual(3, m.header.jubatus_version_maint)
    self.assertNotEqual(1, m.header.crc32)  # must be a valid model after fix_header

    self.assertEqual(TEST_JSON['system']['config'], m.system.config)
    self.assertEqual('classifier', m.system.type)

    self.assertEqual(1, m.user.version)
    self.assertEqual(TEST_JSON['user']['user_data'], m.user.user_data)

    self.assertTrue(m._user_raw is not None)
示例#8
0
    def test_binary(self):
        # get a valid binary model file
        f = _get_binary_file()

        # enable validation: must be loaded successfully
        m = JubaModel.load_binary(f, True)

        self.assertEqual(1, m.header.jubatus_version_major)
        self.assertEqual(2, m.header.jubatus_version_minor)
        self.assertEqual(3, m.header.jubatus_version_maint)
        self.assertNotEqual(
            1, m.header.crc32)  # must be a valid model after fix_header

        self.assertEqual(TEST_JSON['system']['config'], m.system.config)
        self.assertEqual('classifier', m.system.type)

        self.assertEqual(1, m.user.version)
        self.assertEqual(TEST_JSON['user']['user_data'], m.user.user_data)

        self.assertTrue(m._user_raw is not None)
示例#9
0
def _get_model(valid=True):
    m = JubaModel.load_json(StringIO(json.dumps(TEST_JSON)))
    m.fix_header()
    if not valid:
        m.header.crc32 = 0  # break the model file
    return m
示例#10
0
This is an example to show the usage of ``jubakit.model`` package,
which allows low-level model manipulation.

To try this example, first save a model file of jubaweight.
(hint: ``weight_shogun.py`` example automatically saves the model under /tmp)
Then run this example like:

  $ python weight_model_extract.py /tmp/127.0.0.1_0000_weight_shogun.jubatus

to see the term frequency of each feature vector.
"""

# Load the model file.
modelpath = 'weight_shogun_model.jubatus'
if 1 < len(sys.argv):
    modelpath = sys.argv[1]

with open(modelpath, 'rb') as f:
    model = JubaModel.load_binary(f)

# Extract the term frequency part of the model data.
weights = model.data()[0][1][1][0]

# Sort features by the term frequency.
sorted_weights = sorted(weights.items(), key=operator.itemgetter(1), reverse=True)

# Print the result.
print("Weight\t\tFeature")
for (k, v) in sorted_weights:
    print("{0}\t\t{1}".format(v, k))
示例#11
0
def _get_model(valid=True):
  m = JubaModel.load_json(StringIO(json.dumps(TEST_JSON)))
  m.fix_header()
  if not valid:
    m.header.crc32 = 0  # break the model file
  return m