def train(cls, examples, parameters, outputFile=None): #, timeout=None): """ Train the SVM-multiclass classifier on a set of examples. @type examples: string (filename) or list (or iterator) of examples @param examples: a list or file containing examples in SVM-format @type parameters: a dictionary or string @param parameters: parameters for the classifier @type outputFile: string @param outputFile: the name of the model file to be written """ timer = Timer() parameters = cls.getParams(parameters) # If examples are in a list, they will be written to a file for SVM-multiclass if type(examples) == types.ListType: print >> sys.stderr, "Training SVM-MultiClass on", len(examples), "examples" trainPath = self.tempDir+"/train.dat" examples = self.filterTrainingSet(examples) Example.writeExamples(examples, trainPath) else: print >> sys.stderr, "Training SVM-MultiClass on file", examples trainPath = cls.stripComments(examples) args = ["/home/jari/Programs/liblinear-1.5-poly2/train"] cls.__addParametersToSubprocessCall(args, parameters) if outputFile == None: args += [trainPath, "model"] logFile = open("svmmulticlass.log","at") else: args += [trainPath, outputFile] logFile = open(outputFile+".log","wt") rv = subprocess.call(args, stdout = logFile) logFile.close() print >> sys.stderr, timer.toString() return rv
def waitForJobs(self, jobs, pollIntervalSeconds=60, timeout=None, verbose=True): print >> sys.stderr, "Waiting for results" waitTimer = Timer() while(True): jobStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0} for job in jobs: jobStatus[self.getJobStatus(job)] += 1 jobStatusString = str(jobStatus["QUEUED"]) + " queued, " + str(jobStatus["RUNNING"]) + " running, " + str(jobStatus["FINISHED"]) + " finished, " + str(jobStatus["FAILED"]) + " failed" if jobStatus["QUEUED"] + jobStatus["RUNNING"] == 0: if verbose: print >> sys.stderr, "\nAll runs done (" + jobStatusString + ")" break # decide what to do if timeout == None or timeoutTimer.getElapsedTime() < timeout: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(len(jobs)) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString() + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(len(jobs)) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString() + sleepString, time.sleep(5) else: if verbose: print >> sys.stderr, "\nTimed out, ", trainTimer.elapsedTimeToString() break return jobStatus
def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False): # , timeout=None): """ Classify examples with a pre-trained model. @type examples: string (filename) or list (or iterator) of examples @param examples: a list or file containing examples in SVM-format @type modelPath: string @param modelPath: filename of the pre-trained model file @type parameters: a dictionary or string @param parameters: parameters for the classifier @type output: string @param output: the name of the predictions file to be written @type forceInternal: Boolean @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py """ if forceInternal or Settings.SVMMultiClassDir == None: return cls.testInternal(examples, modelPath, output) timer = Timer() if type(examples) == types.ListType: print >> sys.stderr, "Classifying", len(examples), "with SVM-MultiClass model", modelPath examples, predictions = self.filterClassificationSet(examples, False) testPath = self.tempDir+"/test.dat" Example.writeExamples(examples, testPath) else: print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath testPath = cls.stripComments(examples) examples = Example.readExamples(examples,False) args = ["/home/jari/Programs/liblinear-1.5-poly2/predict"] if modelPath == None: modelPath = "model" if parameters != None: parameters = copy.copy(parameters) if parameters.has_key("c"): del parameters["c"] if parameters.has_key("predefined"): parameters = copy.copy(parameters) modelPath = os.path.join(parameters["predefined"][0],"classifier/model") del parameters["predefined"] self.__addParametersToSubprocessCall(args, parameters) if output == None: output = "predictions" logFile = open("svmmulticlass.log","at") else: logFile = open(output+".log","wt") args += [testPath, modelPath, output] #if timeout == None: # timeout = -1 #print args subprocess.call(args, stdout = logFile, stderr = logFile) predictionsFile = open(output, "rt") lines = predictionsFile.readlines() predictionsFile.close() predictions = [] for i in range(len(lines)): predictions.append( [int(lines[i].split()[0])] + lines[i].split()[1:] ) #predictions.append( (examples[i],int(lines[i].split()[0]),"multiclass",lines[i].split()[1:]) ) print >> sys.stderr, timer.toString() return predictions
def waitForJobCount(self, targetCount=0, pollIntervalSeconds=60, verbose=True): if targetCount == -1: return numJobs = self.getNumJobs() if numJobs <= targetCount: return waitTimer = Timer() while numJobs > targetCount: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( numJobs) + " on " + accountName + " (limit=" + str( targetCount) + ")", waitTimer.elapsedTimeToString( ) + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + (10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( numJobs) + " on " + accountName + " (limit=" + str( targetCount) + ")", waitTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) numJobs = self.getNumJobs() print >> sys.stderr, "\nAll jobs done"
def __init__(self, style=None, classSet=None, featureSet=None): if classSet == None: classSet = IdSet(1) assert( classSet.getId("neg") == 1 ) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) self.styles = style self.timerBuildExamples = Timer(False) self.timerCrawl = Timer(False) self.timerCrawlPrecalc = Timer(False) self.timerMatrix = Timer(False) self.timerMatrixPrecalc = Timer(False)
async def run(self): while True: self.last_update = Timer.get_ms_time() self._register_tasks() await asyncio.sleep(self.heartbeat)
async def process(self) -> tuple: response = pack( '<2I', Timer.get_ms_time(), 0 ) return WorldOpCode.SMSG_QUERY_TIME_RESPONSE, [response]
async def process(self): if self.opcode == WorldOpCode.CMSG_NAME_QUERY: # we send this to show player info for another players; to allow chat guid = int.from_bytes(self.packet[6:14], 'little') await QueuesRegistry.name_query_queue.put((self.player, guid)) name_bytes = self.player.name.encode('utf-8') + b'\x00' response = pack( '<Q{name_len}sB3IB'.format(name_len=len(name_bytes)), self.player.guid, name_bytes, 0, self.player.race, self.player.gender, self.player.char_class, 0 ) return WorldOpCode.SMSG_NAME_QUERY_RESPONSE, response elif self.opcode == WorldOpCode.CMSG_QUERY_TIME: response = pack( '<2I', Timer.get_ms_time(), 0 ) return WorldOpCode.SMSG_QUERY_TIME_RESPONSE, response else: return None, None
async def process(self): self._parse_packet() player = self.temp_ref.player response = player.packed_guid + pack( '<IBI4ff', self.move_flags, # unit movement flags self.move_flags2, # extra move flags Timer.get_ms_time(), self.position.x, self.position.y, self.position.z, self.position.orientation, 0) player.position = self.position # await web_data_queue.put(json.dumps({ # 'x': self.position.x, # 'y': self.position.y, # 'z': self.position.z, # 'orientation': self.position.orientation, # })) # await players_queue.put(player) await asyncio.sleep(0) # should return nothing return None, None
async def run(self): while True: self.last_update = Timer.get_ms_time() try: await asyncio.wait_for(self.update(), timeout=1.0) except TimeoutError: Logger.warning('[World Manager]: Timeout...') finally: await asyncio.sleep(self.heartbeat)
def waitForJobCount(self, targetCount=0, pollIntervalSeconds=60, verbose=True): if targetCount == -1: return numJobs = self.getNumJobs() if numJobs <= targetCount: return waitTimer = Timer() while numJobs > targetCount: print >> sys.stderr, "\rWaiting for " + str(numJobs) + " on " + accountName + " (limit=" + str(targetCount) + ")", waitTimer.elapsedTimeToString() + sleepString, numJobs = self.getNumJobs()
def waitForJobs(self, scriptNames, timeout=None): assert len(scriptNames) == len(outputFileNames) print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() combinationStatus = {} while (True): # count finished finished = 0 processStatus = { "FINISHED": 0, "QUEUED": 0, "FAILED": 0, "RUNNING": 0 } for scriptName in scriptNames: status = self.getLouhiStatus(scriptName) combinationStatus[id] = status processStatus[status] += 1 p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str( p["RUNNING"]) + " running, " + str( p["FINISHED"]) + " finished, " + str( p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All jobs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString( ) return False return True
def _get_movement_info(self) -> bytes: data = bytes() data += pack('<B', self.update_flags) if self.update_flags & UpdateObjectFlags.UPDATEFLAG_LIVING.value: if self.object_type == ObjectType.PLAYER.value: # TODO: check for transport self.movement_flags &= ~MovementFlags.ONTRANSPORT.value elif self.object_type == ObjectType.UNIT.value: self.movement_flags &= ~MovementFlags.ONTRANSPORT.value data += pack('<IBI', self.movement_flags, self.movement_flags2, Timer.get_ms_time()) if self.update_flags & UpdateObjectFlags.UPDATEFLAG_HAS_POSITION.value: # TODO: check if transport data += self.update_object.position.to_bytes() if self.update_flags & UpdateObjectFlags.UPDATEFLAG_LIVING.value: # TODO: check transport, swimming and flying data += pack('<I', 0) # last fall time movement = Config.World.Object.Unit.Player.Defaults.Movement data += pack('<8f', movement.speed_walk, movement.speed_run, movement.speed_run_back, movement.speed_swim, movement.speed_swim_back, movement.speed_flight, movement.speed_flight_back, movement.speed_turn) if self.update_flags & UpdateObjectFlags.UPDATEFLAG_LOWGUID.value: if self.object_type == ObjectType.ITEM.value: data += pack('<I', self.update_object.low_guid) elif self.object_type == ObjectType.UNIT.value: data += pack('<I', 0x0000000B) elif self.object_type == ObjectType.PLAYER.value: if self.update_flags & UpdateObjectFlags.UPDATEFLAG_SELF.value: data += ('<I', 0x00000015) else: data += ('<I', 0x00000008) else: data += ('<I', 0x00000000) if self.update_flags & UpdateObjectFlags.UPDATEFLAG_HIGHGUID.value: # TODO: get high guid for another object types if self.object_type == ObjectType.ITEM.value: data += pack('<I', self.update_object.high_guid) else: data += pack('<I', 0x00000000) # high guid for unit or player return data
def waitForJobs(self, jobs, pollIntervalSeconds=60, timeout=None, verbose=True): print >> sys.stderr, "Waiting for results" waitTimer = Timer() while (True): jobStatus = {"FINISHED": 0, "QUEUED": 0, "FAILED": 0, "RUNNING": 0} for job in jobs: jobStatus[self.getJobStatus(job)] += 1 jobStatusString = str(jobStatus["QUEUED"]) + " queued, " + str( jobStatus["RUNNING"]) + " running, " + str( jobStatus["FINISHED"]) + " finished, " + str( jobStatus["FAILED"]) + " failed" if jobStatus["QUEUED"] + jobStatus["RUNNING"] == 0: if verbose: print >> sys.stderr, "\nAll runs done (" + jobStatusString + ")" break # decide what to do if timeout == None or timeoutTimer.getElapsedTime() < timeout: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( len(jobs) ) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString( ) + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + ( 10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( len(jobs) ) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) else: if verbose: print >> sys.stderr, "\nTimed out, ", trainTimer.elapsedTimeToString( ) break return jobStatus
def optimizeLocal(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None): bestResult = None combinationCount = 1 for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters "+str(combinationCount)+"/"+str(len(combinations))+":", str(combination) Stream.setIndent(" ") combinationId = getCombinationString(combination) # Train trainOutput = "model-" + combinationId if workDir != None: trainOutput = os.path.join(workDir, trainOutput) print >> sys.stderr, "Training..." timer = Timer() Classifier.train(trainExamples, combination, trainOutput) print >> sys.stderr, "Training Complete, time:", timer.toString() # Test testOutput = "classifications-" + combinationId if workDir != None: testOutput = os.path.join(workDir, testOutput) print >> sys.stderr, "Testing..." timer = Timer() Classifier.test(testExamples, trainOutput, testOutput) print >> sys.stderr, "Testing Complete, time:", timer.toString() # Evaluate evaluationOutput = "evaluation-" + combinationId + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) Stream.setIndent(" ") evaluator = Evaluator.evaluate(testExamples, testOutput, classIds, evaluationOutput) #print >> sys.stderr, evaluator.toStringConcise(" ") if bestResult == None or evaluator.compare(bestResult[0]) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [evaluator, trainOutput, testOutput, evaluationOutput, combination] combinationCount += 1 Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] return bestResult
def buildExamples(exampleBuilder, sentences, outfilename): timer = Timer() examples = [] if "graph_kernel" in exampleBuilder.styles: counter = ProgressCounter(len(sentences), "Build examples", 0) else: counter = ProgressCounter(len(sentences), "Build examples") calculatePredictedRange(exampleBuilder, sentences) outfile = open(outfilename, "wt") exampleCount = 0 for sentence in sentences: counter.update(1, "Building examples ("+sentence[0].getSentenceId()+"): ") examples = exampleBuilder.buildExamples(sentence[0]) exampleCount += len(examples) examples = exampleBuilder.preProcessExamples(examples) Example.appendExamples(examples, outfile) outfile.close() print >> sys.stderr, "Examples built:", str(exampleCount) print >> sys.stderr, "Features:", len(exampleBuilder.featureSet.getNames()) print >> sys.stderr, "Elapsed", timer.toString()
def waitForJobCount(self, targetCount=0, pollIntervalSeconds=60, verbose=True): if targetCount == -1: return numJobs = self.getNumJobs() if numJobs <= targetCount: return waitTimer = Timer() while numJobs > targetCount: sleepTimer = Timer() accountName = self.account if self.account == None: accountName = "local" if verbose: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(numJobs) + " on " + accountName + " (limit=" + str(targetCount) + ")", waitTimer.elapsedTimeToString() + sleepString, while sleepTimer.getElapsedTime() < pollIntervalSeconds: if verbose: steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(numJobs) + " on " + accountName + " (limit=" + str(targetCount) + ")", waitTimer.elapsedTimeToString() + sleepString, time.sleep(5) numJobs = self.getNumJobs() print >> sys.stderr, "\nAll jobs done"
def buildExamples(exampleBuilder, sentences, outfilename): timer = Timer() examples = [] if "graph_kernel" in exampleBuilder.styles: counter = ProgressCounter(len(sentences), "Build examples", 0) else: counter = ProgressCounter(len(sentences), "Build examples") calculatePredictedRange(exampleBuilder, sentences) outfile = open(outfilename, "wt") exampleCount = 0 for sentence in sentences: counter.update( 1, "Building examples (" + sentence[0].getSentenceId() + "): ") examples = exampleBuilder.buildExamples(sentence[0]) exampleCount += len(examples) examples = exampleBuilder.preProcessExamples(examples) Example.appendExamples(examples, outfile) outfile.close() print >> sys.stderr, "Examples built:", str(exampleCount) print >> sys.stderr, "Features:", len(exampleBuilder.featureSet.getNames()) print >> sys.stderr, "Elapsed", timer.toString()
def train(cls, examples, parameters, outputFile=None): #, timeout=None): """ Train the SVM-multiclass classifier on a set of examples. @type examples: string (filename) or list (or iterator) of examples @param examples: a list or file containing examples in SVM-format @type parameters: a dictionary or string @param parameters: parameters for the classifier @type outputFile: string @param outputFile: the name of the model file to be written """ timer = Timer() parameters = cls.getParams(parameters) # If examples are in a list, they will be written to a file for SVM-multiclass if type(examples) == types.ListType: print >> sys.stderr, "Training SVM-MultiClass on", len( examples), "examples" trainPath = self.tempDir + "/train.dat" examples = self.filterTrainingSet(examples) Example.writeExamples(examples, trainPath) else: print >> sys.stderr, "Training SVM-MultiClass on file", examples trainPath = cls.stripComments(examples) args = ["/home/jari/Programs/liblinear-1.5-poly2/train"] cls.__addParametersToSubprocessCall(args, parameters) if outputFile == None: args += [trainPath, "model"] logFile = open("svmmulticlass.log", "at") else: args += [trainPath, outputFile] logFile = open(outputFile + ".log", "wt") rv = subprocess.call(args, stdout=logFile) logFile.close() print >> sys.stderr, timer.toString() return rv
def optimizeLocal(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None): bestResult = None combinationCount = 1 for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters " + str(combinationCount) + "/" + str( len(combinations)) + ":", str(combination) Stream.setIndent(" ") combinationId = getCombinationString(combination) # Train trainOutput = "model-" + combinationId if workDir != None: trainOutput = os.path.join(workDir, trainOutput) print >> sys.stderr, "Training..." timer = Timer() Classifier.train(trainExamples, combination, trainOutput) print >> sys.stderr, "Training Complete, time:", timer.toString() # Test testOutput = "classifications-" + combinationId if workDir != None: testOutput = os.path.join(workDir, testOutput) print >> sys.stderr, "Testing..." timer = Timer() Classifier.test(testExamples, trainOutput, testOutput) print >> sys.stderr, "Testing Complete, time:", timer.toString() # Evaluate evaluationOutput = "evaluation-" + combinationId + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) Stream.setIndent(" ") evaluator = Evaluator.evaluate(testExamples, testOutput, classIds, evaluationOutput) #print >> sys.stderr, evaluator.toStringConcise(" ") if bestResult == None or evaluator.compare( bestResult[0] ) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [ evaluator, trainOutput, testOutput, evaluationOutput, combination ] combinationCount += 1 Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] return bestResult
def laplacian(self): """Computes hypergraph laplacian Delta=I-Theta, Theta=Dv^-1/2 H W De^-1 H^T Dv^-1/2 Returns ------- Delta: sparse matrix hypergraph laplacian """ with Timer() as t_l: Theta = self.theta_matrix() Delta = spsp.eye(*sp.shape(Theta)) - Theta self.laplacian_timer = t_l.secs return Delta
def incidence_matrix(self): """Computes incidence matrix of size |V|*|E| h(v,e)=1 if v in e h(v,e)=0 if v not in e Returns ------- H: sparse incidence matrix sparse incidence matrix of size |V|*|E| """ with Timer() as t_in: H = spsp.lil_matrix( (sp.shape(sp.unique(self.edge_list.flatten()))[0], sp.shape(self.edge_list)[0])) it = sp.nditer(self.edge_list, flags=['multi_index', 'refs_ok']) while not it.finished: H[it[0], it.multi_index[0]] = 1.0 it.iternext() self.incidence_matrix_timer = t_in.secs return H
def waitForJobs(self, scriptNames, timeout=None): assert len(scriptNames) == len(outputFileNames) print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() combinationStatus = {} while(True): # count finished finished = 0 processStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0} for scriptName in scriptNames: status = self.getLouhiStatus(scriptName) combinationStatus[id] = status processStatus[status] += 1 p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str(p["RUNNING"]) + " running, " + str(p["FINISHED"]) + " finished, " + str(p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All jobs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString() return False return True
def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False): # , timeout=None): """ Classify examples with a pre-trained model. @type examples: string (filename) or list (or iterator) of examples @param examples: a list or file containing examples in SVM-format @type modelPath: string @param modelPath: filename of the pre-trained model file @type parameters: a dictionary or string @param parameters: parameters for the classifier @type output: string @param output: the name of the predictions file to be written @type forceInternal: Boolean @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py """ if forceInternal or Settings.SVMMultiClassDir == None: return cls.testInternal(examples, modelPath, output) timer = Timer() if type(examples) == types.ListType: print >> sys.stderr, "Classifying", len( examples), "with SVM-MultiClass model", modelPath examples, predictions = self.filterClassificationSet( examples, False) testPath = self.tempDir + "/test.dat" Example.writeExamples(examples, testPath) else: print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath testPath = cls.stripComments(examples) examples = Example.readExamples(examples, False) args = ["/home/jari/Programs/liblinear-1.5-poly2/predict"] if modelPath == None: modelPath = "model" if parameters != None: parameters = copy.copy(parameters) if parameters.has_key("c"): del parameters["c"] if parameters.has_key("predefined"): parameters = copy.copy(parameters) modelPath = os.path.join(parameters["predefined"][0], "classifier/model") del parameters["predefined"] self.__addParametersToSubprocessCall(args, parameters) if output == None: output = "predictions" logFile = open("svmmulticlass.log", "at") else: logFile = open(output + ".log", "wt") args += [testPath, modelPath, output] #if timeout == None: # timeout = -1 #print args subprocess.call(args, stdout=logFile, stderr=logFile) predictionsFile = open(output, "rt") lines = predictionsFile.readlines() predictionsFile.close() predictions = [] for i in range(len(lines)): predictions.append([int(lines[i].split()[0])] + lines[i].split()[1:]) #predictions.append( (examples[i],int(lines[i].split()[0]),"multiclass",lines[i].split()[1:]) ) print >> sys.stderr, timer.toString() return predictions
def laplacian_eigs(self, k=6, type='SM', filename=None, minTol=1e-23, **kwargs): """Computes eigenvectors of laplacian Parameters ---------- k: int, optional number of eigenpairs type: str, optional type of eigenpairs, as specified in scipy.sparse.linalg.eigs documentation, or 'LNZ' for lowest non zero filename: str, optional if filename exists, save min eigenvalue, min eigenvector, all used eigenvalues, all used eigenvectors in json format kwargs: named arguments to pass to scipy.sparse.linalg.eigs function Returns ------- eigenvals: ndarray array of k eigenvalues eigenvecs: ndarray array of k eigenvectors """ min_dict = {} lap = self.laplacian().tocsc() if k >= lap.shape[0]: k = lap.shape[0] - 2 if type == 'LNZ': with Timer() as t_eig: vals, vecs = spla.eigs(lap, k=k, which='SM', **kwargs) #DBG print(vals) #sort vals and vecs sorted_eigenvals_indices = sp.argsort(vals) vals = sp.array([vals[i] for i in sorted_eigenvals_indices]) vecs = sp.array([vecs[:, i] for i in sorted_eigenvals_indices]).T #DBG print(vals) print(sorted_eigenvals_indices) self.eigs_timer = t_eig.secs vals_lnz_indices = [ i for i in range(len(vals)) if vals[i] > minTol ] used_vals = sp.array([vals[i] for i in vals_lnz_indices]) used_vecs = sp.array([vecs[:, i] for i in vals_lnz_indices]).T #DBG print('******eigendata:') print(used_vals) print(min(used_vals)) if filename: min_dict['min_eigenval_used'] = sp.real( min(used_vals)).tolist() min_dict['min_eigenvec_used'] = sp.real( used_vecs[:, sp.argmin(used_vals)]).tolist() print('-----------eigenvec_len:') print( sp.shape( sp.real(used_vecs[:, sp.argmin(used_vals)]).tolist())) min_dict['eigenvals_used'] = sp.real(used_vals).tolist() min_dict['eigenvecs_used'] = sp.real(used_vecs).tolist() json_writer(min_dict, filename) self.__isPSD(lap, k) self.__test_eigenpairs(vals, vecs, lap) return used_vals, used_vecs else: with Timer() as t_eig: vals, vecs = spla.eigs(lap, k=k, which=type, **kwargs) sorted_eigenvals_indices = sp.argsort(vals) vals = sp.array([vals[i] for i in sorted_eigenvals_indices]) vecs = sp.array([vecs[:, i] for i in sorted_eigenvals_indices]).T self.eigs_timer = t_eig.secs if filename: min_dict['min_eigenval_used'] = sp.real(min(vals)).tolist() min_dict['min_eigenvec_used'] = sp.real( vecs[:, sp.argmin(vals)]).tolist() min_dict['eigenvals_used'] = sp.real(vals).tolist() min_dict['eigenvecs_used'] = sp.real(vecs).tolist() json_writer(min_dict, filename) return vals, vecs
async def run(self): self._register_tasks() while True: self.last_update = Timer.get_ms_time() await sleep(Config.Realm.Settings.min_timeout)
optparser.add_option("-i", "--input", default=defaultAnalysisFilename, dest="input", help="Corpus in analysis format", metavar="FILE") optparser.add_option("-o", "--output", default=None, dest="output", help="Output directory, useful for debugging") optparser.add_option("-c", "--classifier", default="SVMLightClassifier", dest="classifier", help="Classifier Class") optparser.add_option("-t", "--tokenization", default="split_gs", dest="tokenization", help="tokenization") optparser.add_option("-p", "--parse", default="split_gs", dest="parse", help="parse") optparser.add_option("-x", "--exampleBuilderParameters", default=None, dest="exampleBuilderParameters", help="Parameters for the example builder") optparser.add_option("-y", "--parameters", default=None, dest="parameters", help="Parameters for the classifier") optparser.add_option("-b", "--exampleBuilder", default="SimpleDependencyExampleBuilder", dest="exampleBuilder", help="Example Builder Class") optparser.add_option("-e", "--evaluator", default="BinaryEvaluator", dest="evaluator", help="Prediction evaluator class") optparser.add_option("-v", "--visualization", default=None, dest="visualization", help="Visualization output directory. NOTE: If the directory exists, it will be deleted!") optparser.add_option("-f", "--folds", default="10", dest="folds", help="X-fold cross validation") optparser.add_option("-d", "--paramOptData", default=None, dest="paramOptData", help="The fraction of the corpus to be always used for parameter optimization") optparser.add_option("-m", "--resultsToXML", default=None, dest="resultsToXML", help="Output interaction xml-file") (options, args) = optparser.parse_args() timer = Timer() print >> sys.stderr, timer.toString() if options.folds.find(",") != 0: options.folds = options.folds.split(",") assert(len(options.folds)==2) options.folds[0] = int(options.folds[0]) options.folds[1] = int(options.folds[1]) if options.paramOptData != None: print >> sys.stderr, "Parameter optimization set defined, parameter " + str(options.folds[1]) + "-fold cross validation will not be performed." else: options.folds = (int(options.folds),int(options.folds)) if options.output != None: if os.path.exists(options.output): print >> sys.stderr, "Output directory exists, removing", options.output
class GeneralEntityTypeRecognizer(ExampleBuilder): def __init__(self, style=None, classSet=None, featureSet=None): if classSet == None: classSet = IdSet(1) assert( classSet.getId("neg") == 1 ) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) self.styles = style self.timerBuildExamples = Timer(False) self.timerCrawl = Timer(False) self.timerCrawlPrecalc = Timer(False) self.timerMatrix = Timer(False) self.timerMatrixPrecalc = Timer(False) @classmethod def run(cls, input, output, parse, tokenization, style, idFileTag=None): classSet, featureSet = cls.getIdSets(idFileTag) e = GeneralEntityTypeRecognizer(style, classSet, featureSet) sentences = cls.getSentences(input, parse, tokenization) e.buildExamplesForSentences(sentences, output, idFileTag) print >> sys.stderr, "Time for buildExamples:", e.timerBuildExamples.elapsedTimeToString() print >> sys.stderr, "Time for Crawl:", e.timerCrawl.elapsedTimeToString() print >> sys.stderr, "Time for Crawl(Precalc):", e.timerCrawlPrecalc.elapsedTimeToString() print >> sys.stderr, "Time for Matrix:", e.timerMatrix.elapsedTimeToString() print >> sys.stderr, "Time for Matrix(Precalc):", e.timerMatrixPrecalc.elapsedTimeToString() def preProcessExamples(self, allExamples): if "normalize" in self.styles: print >> sys.stderr, " Normalizing feature vectors" ExampleUtils.normalizeFeatureVectors(allExamples) return allExamples def getMergedEntityType(self, entities): """ If a single token belongs to multiple entities of different types, a new, composite type is defined. This type is the alphabetically ordered types of these entities joined with '---'. """ types = set() for entity in entities: types.add(entity.get("type")) types = list(types) types.sort() typeString = "" for type in types: if typeString != "": typeString += "---" typeString += type return typeString def getTokenFeatures(self, token, sentenceGraph): """ Returns a list of features based on the attributes of a token. These can be used to define more complex features. """ # These features are cached when this method is first called # for a token. if self.tokenFeatures.has_key(token): return self.tokenFeatures[token] features = [] features.append("_txt_"+sentenceGraph.getTokenText(token)) features.append("_POS_"+token.get("POS")) if sentenceGraph.tokenIsName[token]: features.append("_isName") for entity in sentenceGraph.tokenIsEntityHead[token]: if entity.get("isName") == "True": features.append("_annType_"+entity.get("type")) self.tokenFeatures[token] = features return features def buildLinearOrderFeatures(self,sentenceGraph,index,tag,features): """ Linear features are built by marking token features with a tag that defines their relative position in the linear order. """ tag = "linear_"+tag for tokenFeature in self.getTokenFeatures(sentenceGraph.tokens[index], sentenceGraph): features[self.featureSet.getId(tag+tokenFeature)] = 1 def buildExamples(self, sentenceGraph): """ Build one example for each token of the sentence """ self.timerBuildExamples.start() examples = [] exampleIndex = 0 self.tokenFeatures = {} namedEntityCount = 0 for entity in sentenceGraph.entities: if entity.get("isName") == "True": # known data which can be used for features namedEntityCount += 1 namedEntityCountFeature = "nameCount_" + str(namedEntityCount) bagOfWords = {} for token in sentenceGraph.tokens: text = "bow_" + token.get("text") if not bagOfWords.has_key(text): bagOfWords[text] = 0 bagOfWords[text] += 1 if sentenceGraph.tokenIsName[token]: text = "ne_" + text if not bagOfWords.has_key(text): bagOfWords[text] = 0 bagOfWords[text] += 1 bowFeatures = {} for k,v in bagOfWords.iteritems(): bowFeatures[self.featureSet.getId(k)] = v self.timerCrawl.start() self.timerCrawlPrecalc.start() self.inEdgesByToken = {} self.outEdgesByToken = {} self.edgeSetByToken = {} for token in sentenceGraph.tokens: inEdges = sentenceGraph.dependencyGraph.in_edges(token) inEdges.sort(compareDependencyEdgesById) self.inEdgesByToken[token] = inEdges outEdges = sentenceGraph.dependencyGraph.out_edges(token) outEdges.sort(compareDependencyEdgesById) self.outEdgesByToken[token] = outEdges self.edgeSetByToken[token] = set(inEdges + outEdges) self.timerCrawl.stop() self.timerCrawlPrecalc.stop() self.timerMatrix.start() self.timerMatrixPrecalc.start() self._initMatrices(sentenceGraph) self.timerMatrix.stop() self.timerMatrixPrecalc.stop() for i in range(len(sentenceGraph.tokens)): token = sentenceGraph.tokens[i] # Recognize only non-named entities (i.e. interaction words) if sentenceGraph.tokenIsName[token]: continue # CLASS if len(sentenceGraph.tokenIsEntityHead[token]) > 0: category = self.classSet.getId(self.getMergedEntityType(sentenceGraph.tokenIsEntityHead[token])) else: category = 1 # FEATURES features = {} features[self.featureSet.getId(namedEntityCountFeature)] = 1 #for k,v in bagOfWords.iteritems(): # features[self.featureSet.getId(k)] = v # pre-calculate bow _features_ features.update(bowFeatures) # for j in range(len(sentenceGraph.tokens)): # text = "bow_" + sentenceGraph.tokens[j].get("text") # if j < i: # features[self.featureSet.getId("bf_" + text)] = 1 # elif j > i: # features[self.featureSet.getId("af_" + text)] = 1 # Main features text = token.get("text") features[self.featureSet.getId("txt_"+text)] = 1 features[self.featureSet.getId("POS_"+token.get("POS"))] = 1 stem = PorterStemmer.stem(text) features[self.featureSet.getId("stem_"+stem)] = 1 features[self.featureSet.getId("nonstem_"+text[len(stem):])] = 1 # Linear order features for index in [-3,-2,-1,1,2,3]: if i + index > 0 and i + index < len(sentenceGraph.tokens): self.buildLinearOrderFeatures(sentenceGraph, i + index, str(index), features) # Content if i > 0 and text[0].isalpha() and text[0].isupper(): features[self.featureSet.getId("upper_case_start")] = 1 for j in range(len(text)): if j > 0 and text[j].isalpha() and text[j].isupper(): features[self.featureSet.getId("upper_case_middle")] = 1 # numbers and special characters if text[j].isdigit(): features[self.featureSet.getId("has_digits")] = 1 if j > 0 and text[j-1] == "-": features[self.featureSet.getId("has_hyphenated_digit")] = 1 elif text[j] == "-": features[self.featureSet.getId("has_hyphen")] = 1 elif text[j] == "/": features[self.featureSet.getId("has_fslash")] = 1 elif text[j] == "\\": features[self.featureSet.getId("has_bslash")] = 1 # duplets if j > 0: features[self.featureSet.getId("dt_"+text[j-1:j+1].lower())] = 1 # triplets if j > 1: features[self.featureSet.getId("tt_"+text[j-2:j+1].lower())] = 1 # Attached edges (Hanging in and out edges) t1InEdges = self.inEdgesByToken[token] for edge in t1InEdges: edgeType = edge[2].get("type") features[self.featureSet.getId("t1HIn_"+edgeType)] = 1 features[self.featureSet.getId("t1HIn_"+edge[0].get("POS"))] = 1 features[self.featureSet.getId("t1HIn_"+edgeType+"_"+edge[0].get("POS"))] = 1 tokenText = sentenceGraph.getTokenText(edge[0]) features[self.featureSet.getId("t1HIn_"+tokenText)] = 1 features[self.featureSet.getId("t1HIn_"+edgeType+"_"+tokenText)] = 1 t1OutEdges = self.outEdgesByToken[token] for edge in t1OutEdges: edgeType = edge[2].get("type") features[self.featureSet.getId("t1HOut_"+edgeType)] = 1 features[self.featureSet.getId("t1HOut_"+edge[1].get("POS"))] = 1 features[self.featureSet.getId("t1HOut_"+edgeType+"_"+edge[1].get("POS"))] = 1 tokenText = sentenceGraph.getTokenText(edge[1]) features[self.featureSet.getId("t1HOut_"+tokenText)] = 1 features[self.featureSet.getId("t1HOut_"+edgeType+"_"+tokenText)] = 1 extra = {"xtype":"token","t":token.get("id")} examples.append( (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) ) exampleIndex += 1 # chains copyFeatures = copy.copy(features) self.timerCrawl.start() self.buildChains(token, sentenceGraph, features) self.timerCrawl.stop() self.timerMatrix.start() self.buildChainsAlternative(token, copyFeatures, sentenceGraph) self.timerMatrix.stop() diff1 = set(features.keys()) - set(copyFeatures.keys()) diff2 = set(copyFeatures.keys()) - set(features.keys()) if len(diff1) != 0 or len(diff2) != 0: print "Error for token", token.get("id"), token.get("text") intersection = set(features.keys()) & set(copyFeatures.keys()) print "d1:", for key in sorted(diff1): print self.featureSet.getName(key) + ",", print print "d2:", for key in sorted(diff2): print self.featureSet.getName(key) + ",", print print "int:", intNames = [] for key in sorted(intersection): intNames.append(self.featureSet.getName(key)) for name in sorted(intNames): print name + ",", print #assert(len(diff1) == 0) self.timerBuildExamples.stop() return examples def _initMatrices(self, sentenceGraph): nodes = sentenceGraph.dependencyGraph.nodes() self.dod1 = self._dodFromGraph(sentenceGraph, nodes) self.dod2 = self.multDictOfDicts(self.dod1, self.dod1, nodes) self.dod3 = self.multDictOfDicts(self.dod2, self.dod1, nodes) #self.toStringMatrix(self.dod1) #self.toStringMatrix(self.dod2) #self.toStringMatrix(self.dod3) def _dodFromGraph(self, sentenceGraph, nodes): graph = sentenceGraph.dependencyGraph dod = {} for i in nodes: dod[i] = {} for i in nodes: for j in nodes: edge = graph.get_edge(i, j) if len(edge) > 0: if not dod[i].has_key(j): dod[i][j] = [] if not dod[j].has_key(i): dod[j][i] = [] for e in edge: t1 = sentenceGraph.tokensById[e.get("t1")] t2 = sentenceGraph.tokensById[e.get("t2")] # list of visited tokens, last edge of chain, chain string dod[i][j].append( ([t1, t2], e, "frw_"+e.get("type")) ) # frw dod[j][i].append( ([t2, t1], e, "rev_"+e.get("type")) ) # rev return dod def overlap(self, list1, list2): for i in list1: for j in list2: if i == j: # duplicate dependency return True return False def extendPaths(self, edges1, edges2): newEdges = [] for e1 in edges1: for e2 in edges2: if not self.overlap(e1[0], e2[0][1:]): newEdges.append( (e1[0] + e2[0][1:], e2[1], e1[2] + "-" + e2[2]) ) return newEdges def multDictOfDicts(self, dod1, dod2, nodes): result = {} for i in nodes: result[i] = {} for i in nodes: for j in nodes: for k in nodes: if dod1[i].has_key(k): edges1 = dod1[i][k] else: edges1 = [] if dod2[k].has_key(j): edges2 = dod2[k][j] else: edges2 = [] newPaths = self.extendPaths(edges1, edges2) if len(newPaths) > 0: if result[i].has_key(j): result[i][j].extend(newPaths) else: result[i][j] = newPaths return result # def toStringMatrix(self, matrix): # for i in matrix.keys(): # for j in matrix[i].keys(): # newList = [] # for l in matrix[i][j]: # string = "" # for obj in l: # if string != "": # string += "-" # if obj[1]: # string += "frw_"+str(obj[0].get("type")) # else: # string += "rev_"+str(obj[0].get("type")) # newList.append( (l, string) ) # matrix[i][j] = newList def buildChainsAlternative(self, token, features, sentenceGraph): self._buildChainsMatrix(self.dod1, token, features, 3, sentenceGraph) self._buildChainsMatrix(self.dod2, token, features, 2, sentenceGraph) self._buildChainsMatrix(self.dod3, token, features, 1, sentenceGraph) def _buildChainsMatrix(self, matrix, token, features, depth, sentenceGraph): strDepthLeft = "dist_" + str(depth) for node in matrix[token].keys(): if node == token: # don't allow self-loops continue for tokenFeature in self.getTokenFeatures(node, sentenceGraph): features[self.featureSet.getId(strDepthLeft + tokenFeature)] = 1 for chain in matrix[token][node]: features[self.featureSet.getId("chain_"+strDepthLeft+"-"+chain[2])] = 1 features[self.featureSet.getId("dep_"+strDepthLeft+chain[1].get("type"))] = 1 def buildChains(self,token,sentenceGraph,features,depthLeft=3,chain="",visited=None): if depthLeft == 0: return strDepthLeft = "dist_" + str(depthLeft) if visited == None: visited = set() inEdges = self.inEdgesByToken[token] outEdges = self.outEdgesByToken[token] edgeSet = visited.union(self.edgeSetByToken[token]) for edge in inEdges: if not edge in visited: edgeType = edge[2].get("type") features[self.featureSet.getId("dep_"+strDepthLeft+edgeType)] = 1 nextToken = edge[0] for tokenFeature in self.getTokenFeatures(nextToken, sentenceGraph): features[self.featureSet.getId(strDepthLeft + tokenFeature)] = 1 # for entity in sentenceGraph.tokenIsEntityHead[nextToken]: # if entity.get("isName") == "True": # features[self.featureSet.getId("name_dist_"+strDepthLeft)] = 1 # features[self.featureSet.getId("name_dist_"+strDepthLeft+entity.get("type"))] = 1 # features[self.featureSet.getId("POS_dist_"+strDepthLeft+nextToken.get("POS"))] = 1 # tokenText = sentenceGraph.getTokenText(nextToken) # features[self.featureSet.getId("text_dist_"+strDepthLeft+tokenText)] = 1 features[self.featureSet.getId("chain_"+strDepthLeft+chain+"-rev_"+edgeType)] = 1 self.buildChains(nextToken,sentenceGraph,features,depthLeft-1,chain+"-rev_"+edgeType,edgeSet) for edge in outEdges: if not edge in visited: edgeType = edge[2].get("type") features[self.featureSet.getId("dep_"+strDepthLeft+edgeType)] = 1 nextToken = edge[1] for tokenFeature in self.getTokenFeatures(nextToken, sentenceGraph): features[self.featureSet.getId(strDepthLeft + tokenFeature)] = 1 # for entity in sentenceGraph.tokenIsEntityHead[nextToken]: # if entity.get("isName") == "True": # features[self.featureSet.getId("name_dist_"+strDepthLeft)] = 1 # features[self.featureSet.getId("name_dist_"+strDepthLeft+entity.get("type"))] = 1 # features[self.featureSet.getId("POS_dist_"+strDepthLeft+nextToken.get("POS"))] = 1 # tokenText = sentenceGraph.getTokenText(nextToken) # features[self.featureSet.getId("text_dist_"+strDepthLeft+tokenText)] = 1 features[self.featureSet.getId("chain_"+strDepthLeft+chain+"-frw_"+edgeType)] = 1 self.buildChains(nextToken,sentenceGraph,features,depthLeft-1,chain+"-frw_"+edgeType,edgeSet)
def optimize(self, trainSets, classifySets, parameters=defaultOptimizationParameters, evaluationClass=None, evaluationArgs={}, combinationsThatTimedOut=None): if parameters.has_key("predefined"): print >> sys.stderr, "Predefined model, skipping parameter estimation" return {"predefined": parameters["predefined"]} print >> sys.stderr, "Optimizing parameters" parameterNames = parameters.keys() parameterNames.sort() # for p in self.notOptimizedParameters: # if p in parameterNames: # parameterNames.remove(p) parameterValues = [] for parameterName in parameterNames: parameterValues.append([]) for value in parameters[parameterName]: parameterValues[-1].append((parameterName, value)) combinationLists = combine.combine(*parameterValues) combinations = [] for combinationList in combinationLists: combinations.append({}) for value in combinationList: combinations[-1][value[0]] = value[1] if combinationsThatTimedOut == None: combinationsThatTimedOut = [] # # re-add non-optimized parameters to combinations # for p in self.notOptimizedParameters: # if parameters.has_key(p): # for combination in combinations: # combination[p] = parameters[p] bestResult = None combinationCount = 1 if hasattr(self, "tempDir"): mainTempDir = self.tempDir mainDebugFile = self.debugFile for combination in combinations: print >> sys.stderr, " Parameters " + str( combinationCount) + "/" + str( len(combinations)) + ":", str(combination), skip = False #print combinationsThatTimedOut for discarded in combinationsThatTimedOut: if self._dictIsIdentical(combination, discarded): print >> sys.stderr print >> sys.stderr, " Discarded before, skipping" skip = True break if skip: continue # Make copies of examples in case they are modified fold = 1 foldResults = [] for classifyExamples in classifySets: if type(trainSets[0]) == types.StringType: trainExamples = trainSets[0] else: trainExamples = [] for trainSet in trainSets: if trainSet != classifyExamples: trainExamples.extend(trainSet) trainExamplesCopy = trainExamples if type(trainExamples) == types.ListType: trainExamplesCopy = trainExamples #ExampleUtils.copyExamples(trainExamples) classifyExamplesCopy = classifyExamples if type(classifyExamples) == types.ListType: classifyExamplesCopy = classifyExamples #ExampleUtils.copyExamples(classifyExamples) if hasattr(self, "tempDir"): self.tempDir = mainTempDir + "/parameters" + str( combinationCount) + "/optimization" + str(fold) if not os.path.exists(self.tempDir): os.makedirs(self.tempDir) self.debugFile = open(self.tempDir + "/debug.txt", "wt") timer = Timer() #trainStartTime = time.time() trainRV = self.train(trainExamplesCopy, combination) #trainTime = time.time() - trainStartTime #print >> sys.stderr, " Time spent:", trainTime, "s" print >> sys.stderr, " Time spent:", timer.elapsedTimeToString( ) if trainRV == 0: predictions = self.classify(classifyExamplesCopy) evaluation = evaluationClass(predictions, **evaluationArgs) if len(classifySets) == 1: print >> sys.stderr, evaluation.toStringConcise(" ") else: print >> sys.stderr, evaluation.toStringConcise( indent=" ", title="Fold " + str(fold)) foldResults.append(evaluation) if hasattr(self, "tempDir"): evaluation.saveCSV(self.tempDir + "/results.csv") else: combinationsThatTimedOut.append(combination) print >> sys.stderr, " Timed out" fold += 1 if len(foldResults) > 0: averageResult = evaluationClass.average(foldResults) poolResult = evaluationClass.pool(foldResults) if hasattr(self, "tempDir"): TableUtils.writeCSV( combination, mainTempDir + "/parameters" + str(combinationCount) + ".csv") averageResult.saveCSV(mainTempDir + "/parameters" + str(combinationCount) + "/resultsAverage.csv") poolResult.saveCSV(mainTempDir + "/parameters" + str(combinationCount) + "/resultsPooled.csv") if len(classifySets) > 1: print >> sys.stderr, averageResult.toStringConcise( " Avg: ") print >> sys.stderr, poolResult.toStringConcise(" Pool: ") if bestResult == None or poolResult.compare( bestResult[1] ) > 0: #: averageResult.fScore > bestResult[1].fScore: #bestResult = (predictions, averageResult, combination) bestResult = (None, poolResult, combination) # Make sure memory is released, especially important since some of the previous steps # copy examples bestResult[1].classifications = None bestResult[1].predictions = None combinationCount += 1 if hasattr(self, "tempDir"): self.debugFile.close() if hasattr(self, "tempDir"): self.tempDir = mainTempDir self.debugFile = mainDebugFile return bestResult
"--visualization", default=None, dest="visualization", help= "Visualization output directory. NOTE: If the directory exists, it will be deleted!" ) optparser.add_option( "-m", "--resultsToXML", default=None, dest="resultsToXML", help="Results in analysis xml. NOTE: for edges, pairs, not interactions" ) (options, args) = optparser.parse_args() mainTimer = Timer() print >> sys.stderr, __file__ + " start, " + mainTimer.toString() if options.output != None: if os.path.exists(options.output): print >> sys.stderr, "Output directory exists, removing", options.output shutil.rmtree(options.output) os.mkdir(options.output) if not os.path.exists(options.output + "/classifier"): os.mkdir(options.output + "/classifier") classifierParamDict = splitParameters(options.parameters) print >> sys.stderr, "Importing modules" exec "from ExampleBuilders." + options.exampleBuilder + " import " + options.exampleBuilder + " as ExampleBuilder" exec "from Classifiers." + options.classifier + " import " + options.classifier + " as Classifier"
def __init__(self, view_origin, view_dim, screen): self.view_plane = Plane(view_origin, view_dim) self.screen = screen self.zoom_in = Timer() self.zoom_out = Timer() self.move_left = Timer() self.move_right = Timer() self.move_up = Timer() self.move_down = Timer() self.zoom_max = 10 self.zoom_min = 1 self.move_sensitivity = 50 self.zoom_sensitivity = 1 self.key_events = { 'r': self.toggle_zoom_in, 'e': self.toggle_zoom_out, 'right': self.toggle_move_right, 'left': self.toggle_move_left, 'up': self.toggle_move_up, 'down': self.toggle_move_down }
def optimizeCSC(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None, cscConnection=None, downloadAllModels=False, steps="BOTH", threshold=False): bestResult = None combinationCount = 1 combinationIds = [] assert steps in ["BOTH", "SUBMIT", "RESULTS"], steps if type(classIds) == types.StringType: classIds = IdSet(filename=classIds) if Classifier.__name__ == "MultiLabelClassifier": negClass1 = True if "classifier" in combinations[0] and combinations[0][ "classifier"] == "svmperf": negClass1 = False print "negclass1", negClass1 Classifier.makeClassFiles(trainExamples, testExamples, classIds, negClass1=negClass1) if steps in ["BOTH", "SUBMIT"]: print >> sys.stderr, "Initializing runs" for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters " + str( combinationCount) + "/" + str( len(combinations)) + ":", str(combination) # Train combinationIds.append( Classifier.initTrainAndTestOnLouhi(trainExamples, testExamples, combination, cscConnection, workDir, classIds)) combinationCount += 1 else: for combination in combinations: idStr = "" for key in sorted(combination.keys()): idStr += "-" + str(key) + "_" + str(combination[key]) combinationIds.append(idStr) Stream.setIndent() if steps in ["BOTH", "RESULTS"]: Stream.setIndent(" ") print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() #combinationStatus = {} while (True): # count finished finished = 0 processStatus = { "FINISHED": 0, "QUEUED": 0, "FAILED": 0, "RUNNING": 0 } for id in combinationIds: #status = Classifier.getLouhiStatus(id, cscConnection) #combinationStatus[id] = status #processStatus[status] += 1 Classifier.getLouhiStatus(id, cscConnection, processStatus, classIds) p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str( p["RUNNING"]) + " running, " + str( p["FINISHED"]) + " finished, " + str( p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All runs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10 - steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str( len(combinations) ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString( ) + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString( ) break print >> sys.stderr, "Evaluating results" #if type(testExamples) != types.ListType: # print >> sys.stderr, "Loading examples from file", testExamples # testExamples = ExampleUtils.readExamples(testExamples,False) bestCombinationId = None for i in range(len(combinationIds)): id = combinationIds[i] Stream.setIndent(" ") # Evaluate predictions = Classifier.getLouhiPredictions( id, cscConnection, workDir, classIds) if predictions == None: print >> sys.stderr, "No results for combination" + id else: if downloadAllModels: modelFileName = Classifier.downloadModel( id, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) print >> sys.stderr, "Evaluating results for combination" + id evaluationOutput = "evaluation" + id + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) if threshold: print >> sys.stderr, "Thresholding" evaluator.determineThreshold(testExamples, predictions) if Classifier.__name__ != "MultiLabelClassifier": if bestResult == None or evaluator.compare( bestResult[0] ) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [ evaluator, None, predictions, evaluationOutput, combinations[i] ] bestCombinationId = id else: assert Evaluator.__name__ == "MultiLabelEvaluator", Evaluator.__name__ if bestResult == None: bestResult = [{}, None] for className in classIds.Ids: if className != "neg" and "---" not in className: bestResult[0][className] = [ -1, None, classIds.getId(className), None ] for className in classIds.Ids: if className != "neg" and "---" not in className: fscore = evaluator.dataByClass[classIds.getId( className)].fscore if fscore > bestResult[0][className][0]: bestResult[0][className] = [ fscore, id, bestResult[0][className][2] ] if threshold: classId = classIds.getId(className, False) if classId in evaluator.thresholds: bestResult[0][className].append( evaluator.thresholds[classId]) else: bestResult[0][className].append(0.0) else: bestResult[0][className].append(None) bestCombinationId = bestResult os.remove(predictions) # remove predictions to save space Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] #if Classifier.__name__ == "MultiLabelClassifier": # evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) # Download best model and predictions modelFileName = Classifier.downloadModel(bestCombinationId, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) modelFileName = modelFileName + ".gz" #if Classifier.__name__ != "MultiLabelClassifier": #bestResult = [None, None] bestResult[1] = modelFileName return bestResult
def perform_rsa(self, draw="NONE", print_times="NONE", save_summary=False, save_data=None): if not draw in ["NONE", "ITERATION", "END"]: raise ValueError("draw must be either: NONE, ITERATION or END") if not print_times in ["NONE", "ALL", "TOTAL"]: raise ValueError("print_times must be either: NONE, ALL or TOTAL") print_times_all = print_times == "ALL" iter_timers = [] summary_dict = { "configuration": { "fig_radiuses": self.fig_radiuses.astype(float).tolist(), "fig_positions": self.fig_xys.astype(float).tolist(), "cell_num_world_size": [int(self.cell_num_x), int(self.cell_num_y)], "cell_size": float(self.cell_size), "added_fig_num": int(self.added_fig_num), "voxel_removal_treshold": float(self.voxel_removal_treshold), "voxel_num_treshold": int(self.voxel_num_treshold), "fig_area": float(self.fig_area), "fig_radius": float(self.fig_radius), "version": float(self.version) } } iterations_data = [] voxel_fraction = 1.0 self.initialise_rsa() while (self.voxel_num > 0): timer_iter = Timer() timer_iter.start_timer("iteration") timer_iter.start_timer("generation") self.generate_figs() g_t = timer_iter.stop_timer("generation", print_times_all) timer_iter.start_timer("reject_vs_existing") self.reject_figs_vs_existing() re_t = timer_iter.stop_timer("reject_vs_existing", print_times_all) timer_iter.start_timer("reject_vs_new") self.reject_figs_vs_new() rn_t = timer_iter.stop_timer("reject_vs_new", print_times_all) timer_iter.start_timer("split_voxels") voxel_added_cond = ( 1.0 - (self.successfully_added_figs_num / self.added_fig_num)) > self.voxel_removal_treshold voxel_num_cond = self.successfully_added_figs_num == 0 or self.voxel_num < self.voxel_num_treshold if voxel_added_cond and voxel_num_cond: self.split_voxels() voxel_fraction = 0.5 * voxel_fraction s_t = timer_iter.stop_timer("split_voxels", print_times_all) timer_iter.start_timer("reject_voxels") self.reject_voxels() rv_t = timer_iter.stop_timer("reject_voxels", print_times_all) i_t = timer_iter.stop_timer("iteration", print_times_all) iter_timers.append(timer_iter.get_timers()) iteration_dict = { "timers": { "generation": g_t, "reject_vs_existing": re_t, "reject_vs_new": rn_t, "split_voxels": s_t, "reject_voxels": rv_t, "iteration": i_t }, "data": { "voxel_num": int(self.voxel_num), "voxel_fraction": voxel_fraction, "fig_num": int(self.fig_num), "density": self.calculate_density() } } iterations_data.append(iteration_dict) if draw == "ITERATION": draw_func(self) if print_times_all: print("DATA: figures:", self.fig_num) print("DATA: voxels:", self.voxel_num) print("DATA: voxel_fraction:", voxel_fraction) print("DATA: density:", self.calculate_density()) print("===================") self.iteration += 1 total_time = sum([t["iteration"][2] for t in iter_timers]) if print_times == "ALL" or print_times == "TOTAL": print("DATA: figures:", self.fig_num) print("DATA: voxels:", self.voxel_num) print("DATA: voxel_fraction:", voxel_fraction) print("DATA: density:", self.calculate_density()) name = "total" print(f'TIMER: {name:20s} {total_time:.20f}') if draw == "END": draw_func(self) final_dict = { "voxel_fraction": voxel_fraction, "fig_num": int(self.fig_num), "density": self.calculate_density(), "total_time": total_time } summary_dict["iterations"] = iterations_data summary_dict["summary"] = final_dict if save_summary: record_run(summary_dict) if save_data != None: save_output(self.figs, self.fig_num, save_data) self.finalise() return summary_dict
def spectral_clustering(self, clusters_n, k=6, type='SM', embed_type='custom', **kwargs): """Performing k-means spectral clustering on laplacian eigenvectors via scikit-learn kmeans algo Parameters ---------- clusters_n: int number of clusters k: int, optional num of eigenvectors to base kmeans type: str, optional type of eigenvectors to use for kmeans, as specified in laplacian_eigs embed_type: str, optional choices: 'custom' - perform embedding using hypergraph laplacian and custom implemented embedding 'sklearn_laplacian' - perform embedding using modified sklearn.spectral.embedding using the hypergraph laplacian 'sklearn_adjacency' - perform embedding using original sklearn.spectral.embedding using the hypergraph adjacency matrix default is 'custom' kwargs: named arguments to pass to laplacian eigs function Returns ------- centroid: ndarray of shape (k, n_features) label: ndarray of shape (n_samples,) label_dict: dictionary dictionary containing {partiteName: { id: communityId , ...} , ... } node_tags: list of str order of partites, as found in hyperedges inertia: float """ if embed_type == 'sklearn_laplacian': f = None if 'filename' in kwargs: f = kwargs.pop('filename') if 'minTol' in kwargs: kwargs.pop('minTol') if 'maxiter' in kwargs: kwargs.pop('maxiter') eigenvecs = spectral_embedding(self.laplacian(), clusters_n, **kwargs) #=================================================================== # #DBG # print(eigenvecs) # print(eigenvecs.min()) # print(eigenvecs.max()) # print(eigenvecs.mean()) #=================================================================== if f: json_writer({'eigenvecs_used': sp.real(eigenvecs).tolist()}, f) elif embed_type == 'sklearn_adjacency': f = None if 'filename' in kwargs: f = kwargs.pop('filename') if 'minTol' in kwargs: kwargs.pop('minTol') if 'maxiter' in kwargs: kwargs.pop('maxiter') eigenvecs = skmanifold.spectral_embedding(self.adjacency_matrix(), clusters_n, **kwargs) #=================================================================== # #DBG # print(eigenvecs) # print(eigenvecs.min()) # print(eigenvecs.max()) # print(eigenvecs.mean()) #=================================================================== if f: json_writer({'eigenvecs_used': sp.real(eigenvecs).tolist()}, f) else: eigenvecs = self.laplacian_eigs(k, type, **kwargs)[1] with Timer() as t_cl: cen, lab, inert = sklearn.cluster.k_means(eigenvecs, clusters_n) self.clustering_timer = t_cl.secs label_dict = self._community_vector_match(lab) return cen, lab, label_dict, self.node_tags, inert
optparser.add_option( "-d", "--paramOptData", default=None, dest="paramOptData", help= "The fraction of the corpus to be always used for parameter optimization" ) optparser.add_option("-m", "--resultsToXML", default=None, dest="resultsToXML", help="Output interaction xml-file") (options, args) = optparser.parse_args() timer = Timer() print >> sys.stderr, timer.toString() if options.folds.find(",") != 0: options.folds = options.folds.split(",") assert (len(options.folds) == 2) options.folds[0] = int(options.folds[0]) options.folds[1] = int(options.folds[1]) if options.paramOptData != None: print >> sys.stderr, "Parameter optimization set defined, parameter " + str( options.folds[1] ) + "-fold cross validation will not be performed." else: options.folds = (int(options.folds), int(options.folds)) if options.output != None:
def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False, classIds=None): # , timeout=None): """ Classify examples with a pre-trained model. @type examples: string (filename) or list (or iterator) of examples @param examples: a list or file containing examples in SVM-format @type modelPath: string @param modelPath: filename of the pre-trained model file @type parameters: a dictionary or string @param parameters: parameters for the classifier @type output: string @param output: the name of the predictions file to be written @type forceInternal: Boolean @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py """ if type(parameters) == types.StringType: parameters = splitParameters(parameters) timer = Timer() if type(examples) == types.ListType: print >> sys.stderr, "Classifying", len( examples), "with SVM-MultiClass model", modelPath examples, predictions = self.filterClassificationSet( examples, False) testPath = self.tempDir + "/test.dat" Example.writeExamples(examples, testPath) else: print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath testPath = examples examples = Example.readExamples(examples, False) if parameters != None: parameters = copy.copy(parameters) if parameters.has_key("c"): del parameters["c"] if parameters.has_key("predefined"): parameters = copy.copy(parameters) modelPath = os.path.join(parameters["predefined"][0], "classifier/model") del parameters["predefined"] # Read model if modelPath == None: modelPath = "model-multilabel" classModels = {} if modelPath.endswith(".gz"): f = gzip.open(modelPath, "rt") else: f = open(modelPath, "rt") thresholds = {} for line in f: key, value, threshold = line.split() classModels[key] = value if threshold != "None": thresholds[key] = float(threshold) else: thresholds[key] = 0.0 f.close() mergedPredictions = [] if type(classIds) == types.StringType: classIds = IdSet(filename=classIds) #print classModels print "Thresholds", thresholds classifierBin = Settings.SVMMultiClassDir + "/svm_multiclass_classify" print parameters if "classifier" in parameters and "svmperf" in parameters["classifier"]: classifierBin = Settings.SVMPerfDir + "/svm_perf_classify" parameters = copy.copy(parameters) del parameters["classifier"] for className in classIds.getNames(): if className != "neg" and not "---" in className: classId = classIds.getId(className) if thresholds[str(className)] != 0.0: print >> sys.stderr, "Classifying", className, "with threshold", thresholds[ str(className)] else: print >> sys.stderr, "Classifying", className args = [classifierBin] #self.__addParametersToSubprocessCall(args, parameters) classOutput = "predictions" + ".cls-" + className logFile = open("svmmulticlass" + ".cls-" + className + ".log", "at") args += [testPath, classModels[str(className)], classOutput] print args subprocess.call(args, stdout=logFile, stderr=logFile) cls.addPredictions(classOutput, mergedPredictions, classId, len(classIds.Ids), threshold=thresholds[str(className)]) print >> sys.stderr, timer.toString() predFileName = output f = open(predFileName, "wt") for mergedPred in mergedPredictions: if len(mergedPred[0]) > 1 and "1" in mergedPred[0]: mergedPred[0].remove("1") mergedPred[1] = str(mergedPred[1]) mergedPred[0] = ",".join(sorted(list(mergedPred[0]))) f.write(" ".join(mergedPred) + "\n") f.close() return mergedPredictions
def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False, classIds=None): # , timeout=None): """ Classify examples with a pre-trained model. @type examples: string (filename) or list (or iterator) of examples @param examples: a list or file containing examples in SVM-format @type modelPath: string @param modelPath: filename of the pre-trained model file @type parameters: a dictionary or string @param parameters: parameters for the classifier @type output: string @param output: the name of the predictions file to be written @type forceInternal: Boolean @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py """ if type(parameters) == types.StringType: parameters = splitParameters(parameters) timer = Timer() if type(examples) == types.ListType: print >> sys.stderr, "Classifying", len(examples), "with SVM-MultiClass model", modelPath examples, predictions = self.filterClassificationSet(examples, False) testPath = self.tempDir+"/test.dat" Example.writeExamples(examples, testPath) else: print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath testPath = examples examples = Example.readExamples(examples,False) if parameters != None: parameters = copy.copy(parameters) if parameters.has_key("c"): del parameters["c"] if parameters.has_key("predefined"): parameters = copy.copy(parameters) modelPath = os.path.join(parameters["predefined"][0],"classifier/model") del parameters["predefined"] # Read model if modelPath == None: modelPath = "model-multilabel" classModels = {} if modelPath.endswith(".gz"): f = gzip.open(modelPath, "rt") else: f = open(modelPath, "rt") thresholds = {} for line in f: key, value, threshold = line.split() classModels[key] = value if threshold != "None": thresholds[key] = float(threshold) else: thresholds[key] = 0.0 f.close() mergedPredictions = [] if type(classIds) == types.StringType: classIds = IdSet(filename=classIds) #print classModels print "Thresholds", thresholds classifierBin = Settings.SVMMultiClassDir+"/svm_multiclass_classify" print parameters if "classifier" in parameters and "svmperf" in parameters["classifier"]: classifierBin = Settings.SVMPerfDir+"/svm_perf_classify" parameters = copy.copy(parameters) del parameters["classifier"] for className in classIds.getNames(): if className != "neg" and not "---" in className: classId = classIds.getId(className) if thresholds[str(className)] != 0.0: print >> sys.stderr, "Classifying", className, "with threshold", thresholds[str(className)] else: print >> sys.stderr, "Classifying", className args = [classifierBin] #self.__addParametersToSubprocessCall(args, parameters) classOutput = "predictions" + ".cls-" + className logFile = open("svmmulticlass" + ".cls-" + className + ".log","at") args += [testPath, classModels[str(className)], classOutput] print args subprocess.call(args, stdout = logFile, stderr = logFile) cls.addPredictions(classOutput, mergedPredictions, classId, len(classIds.Ids), threshold=thresholds[str(className)]) print >> sys.stderr, timer.toString() predFileName = output f = open(predFileName, "wt") for mergedPred in mergedPredictions: if len(mergedPred[0]) > 1 and "1" in mergedPred[0]: mergedPred[0].remove("1") mergedPred[1] = str(mergedPred[1]) mergedPred[0] = ",".join(sorted(list(mergedPred[0]))) f.write(" ".join(mergedPred) + "\n") f.close() return mergedPredictions
class Camera: def __init__(self, view_origin, view_dim, screen): self.view_plane = Plane(view_origin, view_dim) self.screen = screen self.zoom_in = Timer() self.zoom_out = Timer() self.move_left = Timer() self.move_right = Timer() self.move_up = Timer() self.move_down = Timer() self.zoom_max = 10 self.zoom_min = 1 self.move_sensitivity = 50 self.zoom_sensitivity = 1 self.key_events = { 'r': self.toggle_zoom_in, 'e': self.toggle_zoom_out, 'right': self.toggle_move_right, 'left': self.toggle_move_left, 'up': self.toggle_move_up, 'down': self.toggle_move_down } def update(self): self.move() self.zoom() def move(self): y_move = self.move_up.lap() - self.move_down.lap() x_move = self.move_right.lap() - self.move_left.lap() self.view_plane.origin.add( Vector(x_move * self.move_sensitivity, y_move * self.move_sensitivity)) def zoom(self): to_zoom = self.zoom_in.lap() - self.zoom_out.lap() to_zoom *= self.zoom_sensitivity self.view_plane.dim.add(Vector(to_zoom, to_zoom)) self.screen.write_str(Vector(0, 0), self.view_plane.dim.__str__()) def key_down(self, key): # self.screen.write_str(Vector(1, 1), 'down: ' + key) if key in self.key_events: self.key_events[key](True) def key_up(self, key): # self.screen.write_str(Vector(1, 1), 'up: ' + key) if key in self.key_events: self.key_events[key](False) def toggle_zoom_in(self, key_down): if key_down: if not self.zoom_in.running: self.zoom_in.start() else: self.screen.write_str(Vector(0, 7), self.zoom_in.poll().__str__()) self.zoom_in.stop() def toggle_zoom_out(self, key_down): if key_down: if not self.zoom_out.running: self.zoom_out.start() else: self.zoom_out.stop() def toggle_move_left(self, key_down): if key_down: if not self.move_left.running: self.move_left.start() else: self.move_left.stop() def toggle_move_right(self, key_down): if key_down: if not self.move_right.running: self.move_right.start() else: self.move_right.stop() def toggle_move_up(self, key_down): if key_down: if not self.move_up.running: self.move_up.start() else: self.move_up.stop() def toggle_move_down(self, key_down): if key_down: if not self.move_down.running: self.move_down.start() else: self.move_down.stop()
optparser.add_option("-i", "--input", default=defaultAnalysisFilename, dest="input", help="Corpus in analysis format", metavar="FILE") optparser.add_option("-s", "--test", default=None, dest="input_test", help="Corpus in analysis format", metavar="FILE") optparser.add_option("-g", "--testGold", default=None, dest="input_test_gold", help="Corpus in analysis format", metavar="FILE") optparser.add_option("-o", "--output", default=None, dest="output", help="Output directory, useful for debugging") optparser.add_option("-c", "--classifier", default="SVMLightClassifier", dest="classifier", help="Classifier Class") optparser.add_option("-t", "--tokenization", default="split_gs", dest="tokenization", help="tokenization") optparser.add_option("-p", "--parse", default="split_gs", dest="parse", help="parse") optparser.add_option("-x", "--exampleBuilderParameters", default=None, dest="exampleBuilderParameters", help="Parameters for the example builder") optparser.add_option("-y", "--parameters", default=None, dest="parameters", help="Parameters for the classifier") optparser.add_option("-b", "--exampleBuilder", default="SimpleDependencyExampleBuilder", dest="exampleBuilder", help="Example Builder Class") optparser.add_option("-e", "--evaluator", default="BinaryEvaluator", dest="evaluator", help="Prediction evaluator class") optparser.add_option("-v", "--visualization", default=None, dest="visualization", help="Visualization output directory. NOTE: If the directory exists, it will be deleted!") optparser.add_option("-m", "--resultsToXML", default=None, dest="resultsToXML", help="Results in analysis xml. NOTE: for edges, pairs, not interactions") (options, args) = optparser.parse_args() mainTimer = Timer() print >> sys.stderr, __file__ + " start, " + mainTimer.toString() if options.output != None: if os.path.exists(options.output): print >> sys.stderr, "Output directory exists, removing", options.output shutil.rmtree(options.output) os.mkdir(options.output) if not os.path.exists(options.output+"/classifier"): os.mkdir(options.output+"/classifier") classifierParamDict = splitParameters(options.parameters) print >> sys.stderr, "Importing modules" exec "from ExampleBuilders." + options.exampleBuilder + " import " + options.exampleBuilder + " as ExampleBuilder" exec "from Classifiers." + options.classifier + " import " + options.classifier + " as Classifier"
def optimizeCSC(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None, cscConnection=None, downloadAllModels=False, steps="BOTH", threshold=False): bestResult = None combinationCount = 1 combinationIds = [] assert steps in ["BOTH", "SUBMIT", "RESULTS"], steps if type(classIds) == types.StringType: classIds = IdSet(filename=classIds) if Classifier.__name__ == "MultiLabelClassifier": negClass1 = True if "classifier" in combinations[0] and combinations[0]["classifier"] == "svmperf": negClass1 = False print "negclass1", negClass1 Classifier.makeClassFiles(trainExamples, testExamples, classIds, negClass1=negClass1) if steps in ["BOTH", "SUBMIT"]: print >> sys.stderr, "Initializing runs" for combination in combinations: Stream.setIndent(" ") print >> sys.stderr, "Parameters "+str(combinationCount)+"/"+str(len(combinations))+":", str(combination) # Train combinationIds.append(Classifier.initTrainAndTestOnLouhi(trainExamples, testExamples, combination, cscConnection, workDir, classIds) ) combinationCount += 1 else: for combination in combinations: idStr = "" for key in sorted(combination.keys()): idStr += "-" + str(key) + "_" + str(combination[key]) combinationIds.append(idStr) Stream.setIndent() if steps in ["BOTH", "RESULTS"]: Stream.setIndent(" ") print >> sys.stderr, "Waiting for results" finished = 0 louhiTimer = Timer() #combinationStatus = {} while(True): # count finished finished = 0 processStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0} for id in combinationIds: #status = Classifier.getLouhiStatus(id, cscConnection) #combinationStatus[id] = status #processStatus[status] += 1 Classifier.getLouhiStatus(id, cscConnection, processStatus, classIds) p = processStatus processStatusString = str(p["QUEUED"]) + " queued, " + str(p["RUNNING"]) + " running, " + str(p["FINISHED"]) + " finished, " + str(p["FAILED"]) + " failed" if processStatus["QUEUED"] + processStatus["RUNNING"] == 0: print >> sys.stderr print >> sys.stderr, "All runs done (" + processStatusString + ")" break # decide what to do if timeout == None or louhiTimer.getElapsedTime() < timeout: sleepString = " [ ] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, #time.sleep(60) sleepTimer = Timer() while sleepTimer.getElapsedTime() < 60: steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1 sleepString = " [" + steps * "." + (10-steps) * " " + "] " print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString, time.sleep(5) else: print >> sys.stderr print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString() break print >> sys.stderr, "Evaluating results" #if type(testExamples) != types.ListType: # print >> sys.stderr, "Loading examples from file", testExamples # testExamples = ExampleUtils.readExamples(testExamples,False) bestCombinationId = None for i in range(len(combinationIds)): id = combinationIds[i] Stream.setIndent(" ") # Evaluate predictions = Classifier.getLouhiPredictions(id, cscConnection, workDir, classIds) if predictions == None: print >> sys.stderr, "No results for combination" + id else: if downloadAllModels: modelFileName = Classifier.downloadModel(id, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) print >> sys.stderr, "Evaluating results for combination" + id evaluationOutput = "evaluation" + id + ".csv" if workDir != None: evaluationOutput = os.path.join(workDir, evaluationOutput) evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) if threshold: print >> sys.stderr, "Thresholding" evaluator.determineThreshold(testExamples, predictions) if Classifier.__name__ != "MultiLabelClassifier": if bestResult == None or evaluator.compare(bestResult[0]) > 0: #: averageResult.fScore > bestResult[1].fScore: bestResult = [evaluator, None, predictions, evaluationOutput, combinations[i]] bestCombinationId = id else: assert Evaluator.__name__ == "MultiLabelEvaluator", Evaluator.__name__ if bestResult == None: bestResult = [{}, None] for className in classIds.Ids: if className != "neg" and "---" not in className: bestResult[0][className] = [-1, None, classIds.getId(className), None] for className in classIds.Ids: if className != "neg" and "---" not in className: fscore = evaluator.dataByClass[classIds.getId(className)].fscore if fscore > bestResult[0][className][0]: bestResult[0][className] = [fscore, id, bestResult[0][className][2]] if threshold: classId = classIds.getId(className, False) if classId in evaluator.thresholds: bestResult[0][className].append(evaluator.thresholds[classId]) else: bestResult[0][className].append(0.0) else: bestResult[0][className].append(None) bestCombinationId = bestResult os.remove(predictions) # remove predictions to save space Stream.setIndent() print >> sys.stderr, "Selected parameters", bestResult[-1] #if Classifier.__name__ == "MultiLabelClassifier": # evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput) # Download best model and predictions modelFileName = Classifier.downloadModel(bestCombinationId, cscConnection, workDir) if workDir != None: modelFileName = os.path.join(workDir, modelFileName) subprocess.call("gzip -fv " + modelFileName, shell=True) modelFileName = modelFileName + ".gz" #if Classifier.__name__ != "MultiLabelClassifier": #bestResult = [None, None] bestResult[1] = modelFileName return bestResult