def test_hamilton_distance(self): attributes = [ attr.Attribute('A1', ['a', 'b'], 0), attr.Attribute('A2', ['continuous'], 1), attr.Attribute('A3', ['continuous'], 2), attr.Attribute('A4', ['g', 'h'], 3) ] instance1 = ins.TrainingInstance(['a', 5, 3.4, 'g'], 'y') instance2 = ins.TestInstance(['a', 5, 3.4, 'g']) self.assertEqual( 0, distancemetric.hamiltonian_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 5, 3.4, 'g']) self.assertEqual( 1, distancemetric.hamiltonian_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 4, 3.4, 'h']) self.assertEqual( 3, distancemetric.hamiltonian_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 4, 1.4, 'h']) self.assertEqual( 5, distancemetric.hamiltonian_distance(instance1, instance2, attributes))
def test_euclidean_distance(self): attributes = [ attr.Attribute('A1', ['a', 'b'], 0), attr.Attribute('A2', ['continuous'], 1), attr.Attribute('A3', ['continuous'], 2), attr.Attribute('A4', ['g', 'h'], 3) ] instance1 = ins.TrainingInstance(['a', 5, 3.4, 'g'], 'y') instance2 = ins.TestInstance(['a', 5, 3.4, 'g']) self.assertEqual( 0, distancemetric.euclidean_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 5, 3.4, 'g']) self.assertEqual( 1, distancemetric.euclidean_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 4, 3.4, 'h']) self.assertEqual( math.sqrt(3), distancemetric.euclidean_distance(instance1, instance2, attributes)) instance2 = ins.TestInstance(['b', 4, 1.4, 'h']) self.assertEqual( math.sqrt(7), distancemetric.euclidean_distance(instance1, instance2, attributes))
def test_remove_attrbutes(self): _training = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes') id = attribute.Attribute('id', ['continuous'], 0) annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6) _training.remove_attributes([id, annual_salary]) self.assertEqual(6, len(_training.attrs)) self.assertEqual('34', _training.attrs[0])
def test_attribute_creation(self): _attributes = attributes( datasetsDir(self) + 'numerical' + SEP + 'person') self.assertEqual(8, len(_attributes), '8 attributes should be present') self.assertEqual(a.Attribute('id', ['continuous'], 0), _attributes[0]) self.assertEqual(a.Attribute('creditrating', ['continuous'], 7), _attributes[7])
def test_is_countinuous_returns_true_if_continuous(self): cont_attr = a.Attribute('temperature', ['continuous'], 1) self.assertEqual(a.CONTINUOUS, cont_attr.type) self.assertTrue(cont_attr.is_continuous()) disc_attr = a.Attribute('foo', ['a', 'b', 'c'], 0) self.assertEqual(a.DISCRETE, disc_attr.type) self.assertFalse(disc_attr.is_continuous())
def test_equality(self): attr = a.Attribute('foo', ['a', 'b', 'c'], 0) same = a.Attribute('foo', ['a', 'b', 'c'], 0) othername = a.Attribute('foobar', ['a', 'b', 'c'], 1) otherval = a.Attribute('foo', ['a', 'b', 'c', 'd'], 0) self.assertEqual(attr, same, 'they should be equal') self.assertNotEqual(attr, othername, 'they are not equal') self.assertNotEqual(attr, otherval, 'they are not equal')
def test_empty_freq_dists(self): attr1 = a.Attribute("first", ['a', 'b', 'c'], 0) attr2 = a.Attribute("second", ['d', 'e'], 1) attrs = a.Attributes([attr1, attr2]) freq_dists = attrs.empty_freq_dists() self.assertEqual(2, len(freq_dists)) self.assertEqual(3, len(freq_dists[attr1])) self.assertEqual(2, len(freq_dists[attr2]))
def test_values_of_atrributes(self): _training = ins.TrainingInstance( ['3', '34', 'self-employed', 'married', '2', '3', '120000', '2'], 'yes') dependents = attribute.Attribute('dependents', ['continuous'], 4) annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6) self.assertEqual(['2', '120000'], _training.values([dependents, annual_salary]))
def test_discretise_using_discretised_attributes(self): dependents = attribute.Attribute('dependents',['continuous'], 4) annual_salary = attribute.Attribute('annualsalary', ['continuous'], 6) disc_dependents = da.DiscretisedAttribute('dependents', r.Range(0, 2, True).split(2), 4) disc_annual_salary = da.DiscretisedAttribute('annualsalary', r.Range(0, 120000, True).split(5), 6) discretised_attributes = [disc_dependents, disc_annual_salary] instance = ins.TrainingInstance(['3','34','self-employed','married','2','3','120000','2'],'yes') self.assertEqual(2, instance.value(dependents)) self.assertEqual(120000, instance.value(annual_salary)) instance.discretise(discretised_attributes) self.assertEqual('b', instance.value(disc_dependents)) self.assertEqual('e', instance.value(disc_annual_salary))
def test_attempt_to_discretise_non_continuous_attribute_raises_error(self): path = datasetsDir(self) + 'numerical' + SEP + 'weather' _training = training(path) try: _training.value_ranges([a.Attribute('outlook', ['sunny','overcast','rainy'], 0)]) self.fail('should throw error') except inv.InvalidDataError: pass
def test_get_attribute_value_from_instance_using_attribute(self): instance = ins.TrainingInstance(['bar','two'],'a') attr = attribute.Attribute('second', ['two','duo'], 1) self.assertEqual('two', instance.value(attr)) test = ins.TestInstance(['bar','two']) self.assertEqual('two', test.value(attr)) gold = ins.GoldInstance(['bar','two'],'a') self.assertEqual('two', gold.value(attr))
def metadata(self, file_path): lines = self.__get_lines(file_path, self.NAMES) klass_values = item.NameItem(lines[0]).processed().split(',') index,attributes = 0, [] for line in lines: nameitem = item.NameItem(line) processed = nameitem.processed() if not len(processed) == 0 and nameitem.isAttribute(): attributes.append(a.Attribute(self.get_name(processed), self.get_values(processed), index)) index += 1 return (a.Attributes(attributes), klass_values)
def test_attributes_are_equal(self): attrs = a.Attributes([ a.Attribute('band', ['dual', 'tri', 'quad'], 0), a.Attribute('size', ['big', 'small', 'medium'], 1) ]) same = a.Attributes([ a.Attribute('band', ['dual', 'tri', 'quad'], 0), a.Attribute('size', ['big', 'small', 'medium'], 1) ]) self.assertEqual(attrs, same, 'they should be the same') other = a.Attributes([ a.Attribute('band', ['dual', 'tri', 'quad'], 0), a.Attribute('pda', ['y', 'n'], 1) ]) self.assertNotEqual(self.attrs, other, 'shouldnt be the same')
def test_attribute_creation(self): attr = a.Attribute('foo', ['a', 'b', 'c'], 0) self.assertEqual('foo', attr.name) self.assertEqual(['a', 'b', 'c'], attr.values)
def test_returns_true_if_value_is_present(self): attr = a.Attribute('foo', ['a', 'b', 'c'], 0) self.assertTrue(attr.has_value('c')) self.assertFalse(attr.has_value('d'))
def test_empty_freq_dists(self): attr = a.Attribute('foo', ['a', 'b', 'c'], 0) freq_dists = attr.empty_freq_dists() self.assertEqual(3, len(freq_dists)) for each in attr.values: self.assertEqual(0, freq_dists[each].N())
def test_attributes_contain_an_attribute(self): self.assertTrue( self.attrs.__contains__( a.Attribute('band', ['dual', 'tri', 'quad'], 0)))
def test_to_string(self): attr1 = a.Attribute("first", ['a', 'b', 'c'], 0) attr2 = a.Attribute("second", ['d', 'e'], 1) attrs = a.Attributes([attr1, attr2]) self.assertEqual('[first:[a,b,c] index:0, second:[d,e] index:1]', str(attrs))
def test_values_as_str(self): attr = a.Attribute('foo', ['a', 'b', 'c'], 0) self.assertEqual('a,b,c', attr.values_as_str())
def test_to_string(self): attr = a.Attribute('foo', ['a', 'b', 'c'], 0) self.assertEqual('foo:[a,b,c] index:0', str(attr))