Exemplo n.º 1
0
	def begin_validation (self, classifier) :
		transcripts.printf_information ('Begining validation...')
		self.stepper = transcripts.Stepper ('Validation')
		self.classifier = classifier
		self.classifier.begin_classification ()
		self.begin_validation_1 ()
		transcripts.printf_information ('Began validation.')
Exemplo n.º 2
0
	def begin_training (self) :
		transcripts.printf_information ('Begining training...')
		self.training_stepper = transcripts.Stepper ('Training')
		self.begin_training_1 ()
		transcripts.printf_information ('Began training.')
		transcripts.printf_input ('Prototypes    == %d;', self.get_prototype_count ())
		transcripts.printf_input ('Vector length == %d;', self.get_vector_length ())
Exemplo n.º 3
0
	def end_classification (self) :
		transcripts.printf_information ('Ending classification...')
		self.end_classification_1 ()
		self.classification_stepper.destroy ()
		self.classification_stepper = None
		self.classification_cache = None
		transcripts.printf_information ('Ended classification.')
Exemplo n.º 4
0
	def store_to_file (self, path) :
		transcripts.printf_information ('Storing data to file [%s]...', path)
		if os.path.exists (path) :
			transcripts.printf_warning ('File does exist; overwriting.')
		stream = file (path, 'wb', 512 * 1024)
		self.store_to_stream (stream)
		stream.close ()
Exemplo n.º 5
0
	def end_validation (self) :
		transcripts.printf_information ('Ending validation...')
		self.end_validation_1 ()
		self.classifier.end_classification ()
		self.classifier = None
		self.stepper.destroy ()
		self.stepper = None
		transcripts.printf_information ('Ended validation.')
Exemplo n.º 6
0
	def begin_classification (self) :
		transcripts.printf_information ('Begining classification...')
		self.classification_cache = dict ()
		self.classification_stepper = transcripts.Stepper ('Classification')
		self.begin_classification_1 ()
		transcripts.printf_information ('Began classification.')
		transcripts.printf_input ('Prototypes    == %d;', self.get_prototype_count ())
		transcripts.printf_input ('Vector length == %d;', self.get_vector_length ())
Exemplo n.º 7
0
	def load_from_file (self, path) :
		transcripts.printf_information ('Loading data from file [%s]...', path)
		if not os.path.exists (path) :
			transcripts.printf_warning ('File does not exist; aborting.')
			return
		stream = file (path, 'rb', 512 * 1024)
		self.load_from_stream (stream)
		stream.close ()
Exemplo n.º 8
0
def handle_file(path, dumper):
    transcripts.printf_information("Parsing file [%s]...", path)
    stream = file(path, "rt")
    first_line = stream.readline()
    if first_line != '<!DOCTYPE lewis SYSTEM "lewis.dtd">\n':
        raise Exception()
    handle_stream(stream, dumper)
    stream.close()
Exemplo n.º 9
0
	def end_training (self) :
		transcripts.printf_information ('Ending training...')
		self.end_training_1 ()
		self.training_stepper.destroy ()
		self.training_stepper = None
		self.executed_training = True
		transcripts.printf_information ('Ended training.')
		transcripts.printf_output ('Prototypes    == %d;', self.get_prototype_count ())
		transcripts.printf_output ('Vector length == %d;', self.get_vector_length ())
Exemplo n.º 10
0
	def store (self) :
		transcripts.printf_information ('Storing data to stream [%s]...', self.data_path)
		stream = streams.open (self.data_path, 'o')
		if stream is None :
			transcripts.printf_warning ('Stream does not exist; aborting.')
		pickle.dump (self.document_identifiers, stream)
		pickle.dump (self.document_titles, stream)
		pickle.dump (self.document_contents, stream)
		pickle.dump (self.document_vectors, stream)
		stream.close ()
Exemplo n.º 11
0
def load_content (path) :
	transcripts.printf_information ('Parsing file [%s]...', path)
	stream = file (path, 'rt')
	if path.lower () .endswith ('.pdf') :
		content = execute_load_content (['/usr/bin/pdftotext', path, '-'])
	elif path.lower () .endswith ('.ps') :
		content = execute_load_content (['/usr/bin/pstotext', path])
	else :
		transcripts.printf_error ('File extension is unknown; ignoring.')
		content = None
	return content
Exemplo n.º 12
0
	def load (self) :
		transcripts.printf_information ('Loading data from stream [%s]...', self.data_path)
		stream = streams.open (self.data_path, 'i')
		if stream is None :
			transcripts.printf_warning ('Stream does not exist; aborting.')
			return
		self.document_identifiers = pickle.load (stream)
		self.document_titles = pickle.load (stream)
		self.document_contents = pickle.load (stream)
		self.document_vectors = pickle.load (stream)
		stream.close ()
Exemplo n.º 13
0
	def load_from_stream (self, stream) :
		prototypes = vectors.input_ (stream)
		if len (prototypes) > 0 :
			vector_length = len (prototypes[0])
			for prototype in prototypes :
				if vector_length != len (prototype) :
					raise Exception ()
		else :
			vector_length = None
		self.pre_load (prototypes, vector_length)
		self.prototypes = prototypes
		self.vector_length = vector_length
		self.post_load ()
		transcripts.printf_information ('Loaded from stream.')
		transcripts.printf_output ('Prototypes    == %d;', self.get_prototype_count ())
		transcripts.printf_output ('Vector length == %d;', self.get_vector_length ())