示例#1
0
	def construct(self, data = None, objectClass = None):
		"""

			construct() creates data structures -- attributes or examples -- as indicated by the obj variable. Returns an AttributeSet or ExampleSet data structure.

			data:			resource required to construct object.
			objectClass:	indicator for the type of object class to use. Signals construction of ExampleSet.
		"""

		if data is None:
			raise ValueError("No data specified.")

		elif objectClass is None:
			a = AttributeSet()

			for line in data:
				a.add(Attribute(line.split('\t')))

			return a

		elif objectClass is not None:
			e = ExampleSet()
			for line in data:
				e.add(Example(line, objectClass))

			return e
		else:
			raise ValueError("Object type needs to be indicated as either 'Attribute' (0) or 'Example' (1).")
	def build(self, data = None, attributeSet = None):

		if attributeSet is None:
			# build an attribute set.

			aa = AttributeSet()

			for line in data:
				if line[0] == '@':
					aa.add(Attribute(line[1:].split('\t')))

			return aa

		else:
			# build an example set.

			ee = ExampleSet()

			for line in data:
				if type(line) == type(str()):
					if line[0] == '#':
						ee.add(Example(line[1:], attributeSet))
				elif type(line) == type(ExampleSet()):
					ee.add(line)

			return ee
	def __init__(self, filePath=None):
		self.name		= None
		self.attributes = None
		self.examples   = ExampleSet()

		self.iteration_index = 0
		
		if filePath is not None:
			self.initialize(filePath)
示例#4
0
	def getTrainValidateTestSet(self, p = .6, v = .5):
		examples = self.getExamples()
		n = int(len(examples) * p)
		m = int(len(examples) * ((1. - p)*v))
		s = sample(examples, n)

		train = ExampleSet()
		valid = ExampleSet()
		tests = ExampleSet()

		for example in examples:
			if example in s: 						train.add(example)
			elif example not in train and m != 0: 	valid.add(example); m-=1
			elif example not in valid: 				tests.add(example)

		print "train: {0} valid: {1} tests: {2} all: {3}".format(len(train), len(valid), len(tests), len(self.getExamples()))
		return train, valid, tests
    def build(self, data=None, attributeSet=None):
        """ return an AttributeSet or ExampleSet object
			@param	data: input data; raw (textual) attributes or examples
			@param	attributeSet: AttributeSet object required to create ExampleSet objects
			@return	AttributeSet or ExampleSet objects
		"""
        # Build an AttributeSet object from raw (text) attributes.
        if attributeSet is None:
            attributeSet = AttributeSet()

            for line in data:

                # If the line is prefixed with '@', create an Attribute object and add it to the AttributeSet
                if line[0] == '@':
                    attributeSet.add(Attribute(line[1:].split('\t')))

            return attributeSet

        # Build an ExampleSet object from raw (text) examples and an AttributeSet.
        else:
            exampleSet = ExampleSet()

            # Loop through the data split by newline
            for line in data:

                # If the line is a string, check it is an example (prefixed by '#')
                if type(line) == type(str()):

                    # If the line is an example, create an Example object and add it to the ExampleSet
                    if line[0] == '#':
                        exampleSet.add(Example(line[1:], attributeSet))

                # Commented out for the time being 7/13/2016
                #else:
                #	exampleSet.add(line)

            return exampleSet
	def build(self, data = None, attributeSet = None):
		""" return an AttributeSet or ExampleSet object
			@param	data: input data; raw (textual) attributes or examples
			@param	attributeSet: AttributeSet object required to create ExampleSet objects
			@return	AttributeSet or ExampleSet objects
		"""
		# Build an AttributeSet object from raw (text) attributes.
		if attributeSet is None:
			attributeSet = AttributeSet()

			for line in data:

				# If the line is prefixed with '@', create an Attribute object and add it to the AttributeSet
				if line[0] == '@':
					attributeSet.add(Attribute(line[1:].split('\t')))

			return attributeSet

		# Build an ExampleSet object from raw (text) examples and an AttributeSet.
		else:
			exampleSet = ExampleSet()

			# Loop through the data split by newline
			for line in data:

				# If the line is a string, check it is an example (prefixed by '#')
				if type(line) == type(str()):

					# If the line is an example, create an Example object and add it to the ExampleSet
					if line[0] == '#':
						exampleSet.add(Example(line[1:], attributeSet))

				# Commented out for the time being 7/13/2016
				#else:
				#	exampleSet.add(line)

			return exampleSet
示例#7
0
	def getTrainTestSet(self, p = .6):
		examples = self.getExamples()
		n = int(len(examples) * p)
		s = sample(examples, n)
		
		train = ExampleSet()
		tests = ExampleSet()

		for example in examples:
			if example in s: 			train.add(example)
			elif example not in train: 	tests.add(example)

		return train, tests
	def getExamplesByClass(self, i = None):
		""" return examples with label i """
		return ExampleSet(self.examples.getExamples(i))