Exemplo n.º 1
0
 def train(cls, examples, parameters, outputFile=None): #, timeout=None):
     """
     Train the SVM-multiclass classifier on a set of examples.
     
     @type examples: string (filename) or list (or iterator) of examples
     @param examples: a list or file containing examples in SVM-format
     @type parameters: a dictionary or string
     @param parameters: parameters for the classifier
     @type outputFile: string
     @param outputFile: the name of the model file to be written
     """
     timer = Timer()
     parameters = cls.getParams(parameters)
     
     # If examples are in a list, they will be written to a file for SVM-multiclass
     if type(examples) == types.ListType:
         print >> sys.stderr, "Training SVM-MultiClass on", len(examples), "examples"
         trainPath = self.tempDir+"/train.dat"
         examples = self.filterTrainingSet(examples)
         Example.writeExamples(examples, trainPath)
     else:
         print >> sys.stderr, "Training SVM-MultiClass on file", examples
         trainPath = cls.stripComments(examples)
     args = ["/home/jari/Programs/liblinear-1.5-poly2/train"]
     cls.__addParametersToSubprocessCall(args, parameters)
     if outputFile == None:
         args += [trainPath, "model"]
         logFile = open("svmmulticlass.log","at")
     else:
         args += [trainPath, outputFile]
         logFile = open(outputFile+".log","wt")
     rv = subprocess.call(args, stdout = logFile)
     logFile.close()
     print >> sys.stderr, timer.toString()
     return rv
Exemplo n.º 2
0
 def waitForJobs(self, jobs, pollIntervalSeconds=60, timeout=None, verbose=True):
     print >> sys.stderr, "Waiting for results"
     waitTimer = Timer()
     while(True):
         jobStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0}
         for job in jobs:
             jobStatus[self.getJobStatus(job)] += 1
         jobStatusString = str(jobStatus["QUEUED"]) + " queued, " + str(jobStatus["RUNNING"]) + " running, " + str(jobStatus["FINISHED"]) + " finished, " + str(jobStatus["FAILED"]) + " failed"
         if jobStatus["QUEUED"] + jobStatus["RUNNING"] == 0:
             if verbose:
                 print >> sys.stderr, "\nAll runs done (" + jobStatusString + ")"
             break
         # decide what to do
         if timeout == None or timeoutTimer.getElapsedTime() < timeout:
             sleepTimer = Timer()
             accountName = self.account
             if self.account == None:
                 accountName = "local"
             if verbose:
                 sleepString = " [          ]     "
                 print >> sys.stderr, "\rWaiting for " + str(len(jobs)) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString() + sleepString,
             while sleepTimer.getElapsedTime() < pollIntervalSeconds:
                 if verbose:
                     steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1
                     sleepString = " [" + steps * "." + (10-steps) * " " + "]     "
                     print >> sys.stderr, "\rWaiting for " + str(len(jobs)) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString() + sleepString,
                 time.sleep(5)                
         else:
             if verbose:
                 print >> sys.stderr, "\nTimed out, ", trainTimer.elapsedTimeToString()
             break
     return jobStatus
Exemplo n.º 3
0
 def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False): # , timeout=None):
     """
     Classify examples with a pre-trained model.
     
     @type examples: string (filename) or list (or iterator) of examples
     @param examples: a list or file containing examples in SVM-format
     @type modelPath: string
     @param modelPath: filename of the pre-trained model file
     @type parameters: a dictionary or string
     @param parameters: parameters for the classifier
     @type output: string
     @param output: the name of the predictions file to be written
     @type forceInternal: Boolean
     @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
     """
     if forceInternal or Settings.SVMMultiClassDir == None:
         return cls.testInternal(examples, modelPath, output)
     timer = Timer()
     if type(examples) == types.ListType:
         print >> sys.stderr, "Classifying", len(examples), "with SVM-MultiClass model", modelPath
         examples, predictions = self.filterClassificationSet(examples, False)
         testPath = self.tempDir+"/test.dat"
         Example.writeExamples(examples, testPath)
     else:
         print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
         testPath = cls.stripComments(examples)
         examples = Example.readExamples(examples,False)
     args = ["/home/jari/Programs/liblinear-1.5-poly2/predict"]
     if modelPath == None:
         modelPath = "model"
     if parameters != None:
         parameters = copy.copy(parameters)
         if parameters.has_key("c"):
             del parameters["c"]
         if parameters.has_key("predefined"):
             parameters = copy.copy(parameters)
             modelPath = os.path.join(parameters["predefined"][0],"classifier/model")
             del parameters["predefined"]
         self.__addParametersToSubprocessCall(args, parameters)
     if output == None:
         output = "predictions"
         logFile = open("svmmulticlass.log","at")
     else:
         logFile = open(output+".log","wt")
     args += [testPath, modelPath, output]
     #if timeout == None:
     #    timeout = -1
     #print args
     subprocess.call(args, stdout = logFile, stderr = logFile)
     predictionsFile = open(output, "rt")
     lines = predictionsFile.readlines()
     predictionsFile.close()
     predictions = []
     for i in range(len(lines)):
         predictions.append( [int(lines[i].split()[0])] + lines[i].split()[1:] )
         #predictions.append( (examples[i],int(lines[i].split()[0]),"multiclass",lines[i].split()[1:]) )
     print >> sys.stderr, timer.toString()
     return predictions
Exemplo n.º 4
0
 def waitForJobCount(self,
                     targetCount=0,
                     pollIntervalSeconds=60,
                     verbose=True):
     if targetCount == -1:
         return
     numJobs = self.getNumJobs()
     if numJobs <= targetCount:
         return
     waitTimer = Timer()
     while numJobs > targetCount:
         sleepTimer = Timer()
         accountName = self.account
         if self.account == None:
             accountName = "local"
         if verbose:
             sleepString = " [          ]     "
             print >> sys.stderr, "\rWaiting for " + str(
                 numJobs) + " on " + accountName + " (limit=" + str(
                     targetCount) + ")", waitTimer.elapsedTimeToString(
                     ) + sleepString,
         while sleepTimer.getElapsedTime() < pollIntervalSeconds:
             if verbose:
                 steps = int(10 * sleepTimer.getElapsedTime() /
                             pollIntervalSeconds) + 1
                 sleepString = " [" + steps * "." + (10 -
                                                     steps) * " " + "]     "
                 print >> sys.stderr, "\rWaiting for " + str(
                     numJobs) + " on " + accountName + " (limit=" + str(
                         targetCount) + ")", waitTimer.elapsedTimeToString(
                         ) + sleepString,
             time.sleep(5)
         numJobs = self.getNumJobs()
     print >> sys.stderr, "\nAll jobs done"
Exemplo n.º 5
0
 def __init__(self, style=None, classSet=None, featureSet=None):
     if classSet == None:
         classSet = IdSet(1)
     assert( classSet.getId("neg") == 1 )
     if featureSet == None:
         featureSet = IdSet()
     
     ExampleBuilder.__init__(self, classSet, featureSet)
     self.styles = style
     self.timerBuildExamples = Timer(False)
     self.timerCrawl = Timer(False)
     self.timerCrawlPrecalc = Timer(False)
     self.timerMatrix = Timer(False)
     self.timerMatrixPrecalc = Timer(False)
Exemplo n.º 6
0
    async def run(self):
        while True:
            self.last_update = Timer.get_ms_time()

            self._register_tasks()

            await asyncio.sleep(self.heartbeat)
Exemplo n.º 7
0
 async def process(self) -> tuple:
     response = pack(
         '<2I',
         Timer.get_ms_time(),
         0
     )
     return WorldOpCode.SMSG_QUERY_TIME_RESPONSE, [response]
Exemplo n.º 8
0
    async def process(self):
        if self.opcode == WorldOpCode.CMSG_NAME_QUERY:
            # we send this to show player info for another players; to allow chat
            guid = int.from_bytes(self.packet[6:14], 'little')

            await QueuesRegistry.name_query_queue.put((self.player, guid))

            name_bytes = self.player.name.encode('utf-8') + b'\x00'
            response = pack(
                '<Q{name_len}sB3IB'.format(name_len=len(name_bytes)),
                self.player.guid,
                name_bytes,
                0,
                self.player.race,
                self.player.gender,
                self.player.char_class,
                0
            )
            return WorldOpCode.SMSG_NAME_QUERY_RESPONSE, response

        elif self.opcode == WorldOpCode.CMSG_QUERY_TIME:
            response = pack(
                '<2I',
                Timer.get_ms_time(),
                0
            )
            return WorldOpCode.SMSG_QUERY_TIME_RESPONSE, response

        else:
            return None, None
Exemplo n.º 9
0
    async def process(self):
        self._parse_packet()

        player = self.temp_ref.player

        response = player.packed_guid + pack(
            '<IBI4ff',
            self.move_flags,  # unit movement flags
            self.move_flags2,  # extra move flags
            Timer.get_ms_time(),
            self.position.x,
            self.position.y,
            self.position.z,
            self.position.orientation,
            0)

        player.position = self.position

        # await web_data_queue.put(json.dumps({
        #     'x': self.position.x,
        #     'y': self.position.y,
        #     'z': self.position.z,
        #     'orientation': self.position.orientation,
        # }))

        # await players_queue.put(player)

        await asyncio.sleep(0)

        # should return nothing
        return None, None
Exemplo n.º 10
0
    async def run(self):
        while True:
            self.last_update = Timer.get_ms_time()

            try:
                await asyncio.wait_for(self.update(), timeout=1.0)
            except TimeoutError:
                Logger.warning('[World Manager]: Timeout...')
            finally:
                await asyncio.sleep(self.heartbeat)
Exemplo n.º 11
0
 def waitForJobCount(self, targetCount=0, pollIntervalSeconds=60, verbose=True):
     if targetCount == -1:
         return
     numJobs = self.getNumJobs()
     if numJobs <= targetCount:
         return
     waitTimer = Timer()
     while numJobs > targetCount:
         print >> sys.stderr, "\rWaiting for " + str(numJobs) + " on " + accountName + " (limit=" + str(targetCount) + ")", waitTimer.elapsedTimeToString() + sleepString,
         numJobs = self.getNumJobs()
Exemplo n.º 12
0
 def waitForJobs(self, scriptNames, timeout=None):
     assert len(scriptNames) == len(outputFileNames)
     print >> sys.stderr, "Waiting for results"
     finished = 0
     louhiTimer = Timer()
     combinationStatus = {}
     while (True):
         # count finished
         finished = 0
         processStatus = {
             "FINISHED": 0,
             "QUEUED": 0,
             "FAILED": 0,
             "RUNNING": 0
         }
         for scriptName in scriptNames:
             status = self.getLouhiStatus(scriptName)
             combinationStatus[id] = status
             processStatus[status] += 1
         p = processStatus
         processStatusString = str(p["QUEUED"]) + " queued, " + str(
             p["RUNNING"]) + " running, " + str(
                 p["FINISHED"]) + " finished, " + str(
                     p["FAILED"]) + " failed"
         if processStatus["QUEUED"] + processStatus["RUNNING"] == 0:
             print >> sys.stderr
             print >> sys.stderr, "All jobs done (" + processStatusString + ")"
             break
         # decide what to do
         if timeout == None or louhiTimer.getElapsedTime() < timeout:
             sleepString = " [          ]     "
             print >> sys.stderr, "\rWaiting for " + str(
                 len(combinations)
             ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString(
             ) + sleepString,
             #time.sleep(60)
             sleepTimer = Timer()
             while sleepTimer.getElapsedTime() < 60:
                 steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1
                 sleepString = " [" + steps * "." + (10 -
                                                     steps) * " " + "]     "
                 print >> sys.stderr, "\rWaiting for " + str(
                     len(combinations)
                 ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString(
                 ) + sleepString,
                 time.sleep(5)
         else:
             print >> sys.stderr
             print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString(
             )
             return False
     return True
Exemplo n.º 13
0
    def _get_movement_info(self) -> bytes:
        data = bytes()

        data += pack('<B', self.update_flags)

        if self.update_flags & UpdateObjectFlags.UPDATEFLAG_LIVING.value:
            if self.object_type == ObjectType.PLAYER.value:
                # TODO: check for transport
                self.movement_flags &= ~MovementFlags.ONTRANSPORT.value
            elif self.object_type == ObjectType.UNIT.value:
                self.movement_flags &= ~MovementFlags.ONTRANSPORT.value

            data += pack('<IBI', self.movement_flags, self.movement_flags2,
                         Timer.get_ms_time())

        if self.update_flags & UpdateObjectFlags.UPDATEFLAG_HAS_POSITION.value:
            # TODO: check if transport
            data += self.update_object.position.to_bytes()

        if self.update_flags & UpdateObjectFlags.UPDATEFLAG_LIVING.value:
            # TODO: check transport, swimming and flying
            data += pack('<I', 0)  # last fall time

            movement = Config.World.Object.Unit.Player.Defaults.Movement

            data += pack('<8f', movement.speed_walk, movement.speed_run,
                         movement.speed_run_back, movement.speed_swim,
                         movement.speed_swim_back, movement.speed_flight,
                         movement.speed_flight_back, movement.speed_turn)

        if self.update_flags & UpdateObjectFlags.UPDATEFLAG_LOWGUID.value:
            if self.object_type == ObjectType.ITEM.value:
                data += pack('<I', self.update_object.low_guid)
            elif self.object_type == ObjectType.UNIT.value:
                data += pack('<I', 0x0000000B)
            elif self.object_type == ObjectType.PLAYER.value:
                if self.update_flags & UpdateObjectFlags.UPDATEFLAG_SELF.value:
                    data += ('<I', 0x00000015)
                else:
                    data += ('<I', 0x00000008)
            else:
                data += ('<I', 0x00000000)

        if self.update_flags & UpdateObjectFlags.UPDATEFLAG_HIGHGUID.value:
            # TODO: get high guid for another object types
            if self.object_type == ObjectType.ITEM.value:
                data += pack('<I', self.update_object.high_guid)
            else:
                data += pack('<I', 0x00000000)  # high guid for unit or player

        return data
Exemplo n.º 14
0
 def waitForJobs(self,
                 jobs,
                 pollIntervalSeconds=60,
                 timeout=None,
                 verbose=True):
     print >> sys.stderr, "Waiting for results"
     waitTimer = Timer()
     while (True):
         jobStatus = {"FINISHED": 0, "QUEUED": 0, "FAILED": 0, "RUNNING": 0}
         for job in jobs:
             jobStatus[self.getJobStatus(job)] += 1
         jobStatusString = str(jobStatus["QUEUED"]) + " queued, " + str(
             jobStatus["RUNNING"]) + " running, " + str(
                 jobStatus["FINISHED"]) + " finished, " + str(
                     jobStatus["FAILED"]) + " failed"
         if jobStatus["QUEUED"] + jobStatus["RUNNING"] == 0:
             if verbose:
                 print >> sys.stderr, "\nAll runs done (" + jobStatusString + ")"
             break
         # decide what to do
         if timeout == None or timeoutTimer.getElapsedTime() < timeout:
             sleepTimer = Timer()
             accountName = self.account
             if self.account == None:
                 accountName = "local"
             if verbose:
                 sleepString = " [          ]     "
                 print >> sys.stderr, "\rWaiting for " + str(
                     len(jobs)
                 ) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString(
                 ) + sleepString,
             while sleepTimer.getElapsedTime() < pollIntervalSeconds:
                 if verbose:
                     steps = int(10 * sleepTimer.getElapsedTime() /
                                 pollIntervalSeconds) + 1
                     sleepString = " [" + steps * "." + (
                         10 - steps) * " " + "]     "
                     print >> sys.stderr, "\rWaiting for " + str(
                         len(jobs)
                     ) + " on " + accountName + "(" + jobStatusString + "),", waitTimer.elapsedTimeToString(
                     ) + sleepString,
                 time.sleep(5)
         else:
             if verbose:
                 print >> sys.stderr, "\nTimed out, ", trainTimer.elapsedTimeToString(
                 )
             break
     return jobStatus
Exemplo n.º 15
0
def optimizeLocal(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None):
    bestResult = None
    combinationCount = 1
    for combination in combinations:
        Stream.setIndent(" ")
        print >> sys.stderr, "Parameters "+str(combinationCount)+"/"+str(len(combinations))+":", str(combination)
        Stream.setIndent("  ")
        combinationId = getCombinationString(combination)
        # Train
        trainOutput = "model-" + combinationId
        if workDir != None:
            trainOutput = os.path.join(workDir, trainOutput)
        print >> sys.stderr, "Training..."
        timer = Timer()
        Classifier.train(trainExamples, combination, trainOutput)
        print >> sys.stderr, "Training Complete, time:", timer.toString()
        # Test
        testOutput = "classifications-" + combinationId
        if workDir != None:
            testOutput = os.path.join(workDir, testOutput)
        print >> sys.stderr, "Testing..."
        timer = Timer()
        Classifier.test(testExamples, trainOutput, testOutput)
        print >> sys.stderr, "Testing Complete, time:", timer.toString()
        # Evaluate
        evaluationOutput = "evaluation-" + combinationId + ".csv"
        if workDir != None:
            evaluationOutput = os.path.join(workDir, evaluationOutput)
        Stream.setIndent("   ")
        evaluator = Evaluator.evaluate(testExamples, testOutput, classIds, evaluationOutput)
        #print >> sys.stderr, evaluator.toStringConcise("  ")

        if bestResult == None or evaluator.compare(bestResult[0]) > 0: #: averageResult.fScore > bestResult[1].fScore:
            bestResult = [evaluator, trainOutput, testOutput, evaluationOutput, combination]
        combinationCount += 1
    Stream.setIndent()
    print >> sys.stderr, "Selected parameters", bestResult[-1]
    return bestResult
Exemplo n.º 16
0
def buildExamples(exampleBuilder, sentences, outfilename):
    timer = Timer()
    examples = []
    if "graph_kernel" in exampleBuilder.styles:
        counter = ProgressCounter(len(sentences), "Build examples", 0)
    else:
        counter = ProgressCounter(len(sentences), "Build examples")
    
    calculatePredictedRange(exampleBuilder, sentences)
    
    outfile = open(outfilename, "wt")
    exampleCount = 0
    for sentence in sentences:
        counter.update(1, "Building examples ("+sentence[0].getSentenceId()+"): ")
        examples = exampleBuilder.buildExamples(sentence[0])
        exampleCount += len(examples)
        examples = exampleBuilder.preProcessExamples(examples)
        Example.appendExamples(examples, outfile)
    outfile.close()

    print >> sys.stderr, "Examples built:", str(exampleCount)
    print >> sys.stderr, "Features:", len(exampleBuilder.featureSet.getNames())
    print >> sys.stderr, "Elapsed", timer.toString()
Exemplo n.º 17
0
 def waitForJobCount(self, targetCount=0, pollIntervalSeconds=60, verbose=True):
     if targetCount == -1:
         return
     numJobs = self.getNumJobs()
     if numJobs <= targetCount:
         return
     waitTimer = Timer()
     while numJobs > targetCount:
         sleepTimer = Timer()
         accountName = self.account
         if self.account == None:
             accountName = "local"
         if verbose:
             sleepString = " [          ]     "
             print >> sys.stderr, "\rWaiting for " + str(numJobs) + " on " + accountName + " (limit=" + str(targetCount) + ")", waitTimer.elapsedTimeToString() + sleepString,
         while sleepTimer.getElapsedTime() < pollIntervalSeconds:
             if verbose:
                 steps = int(10 * sleepTimer.getElapsedTime() / pollIntervalSeconds) + 1
                 sleepString = " [" + steps * "." + (10-steps) * " " + "]     "
                 print >> sys.stderr, "\rWaiting for " + str(numJobs) + " on " + accountName + " (limit=" + str(targetCount) + ")", waitTimer.elapsedTimeToString() + sleepString,
             time.sleep(5)                
         numJobs = self.getNumJobs()
     print >> sys.stderr, "\nAll jobs done"
Exemplo n.º 18
0
def buildExamples(exampleBuilder, sentences, outfilename):
    timer = Timer()
    examples = []
    if "graph_kernel" in exampleBuilder.styles:
        counter = ProgressCounter(len(sentences), "Build examples", 0)
    else:
        counter = ProgressCounter(len(sentences), "Build examples")

    calculatePredictedRange(exampleBuilder, sentences)

    outfile = open(outfilename, "wt")
    exampleCount = 0
    for sentence in sentences:
        counter.update(
            1, "Building examples (" + sentence[0].getSentenceId() + "): ")
        examples = exampleBuilder.buildExamples(sentence[0])
        exampleCount += len(examples)
        examples = exampleBuilder.preProcessExamples(examples)
        Example.appendExamples(examples, outfile)
    outfile.close()

    print >> sys.stderr, "Examples built:", str(exampleCount)
    print >> sys.stderr, "Features:", len(exampleBuilder.featureSet.getNames())
    print >> sys.stderr, "Elapsed", timer.toString()
Exemplo n.º 19
0
    def train(cls, examples, parameters, outputFile=None):  #, timeout=None):
        """
        Train the SVM-multiclass classifier on a set of examples.
        
        @type examples: string (filename) or list (or iterator) of examples
        @param examples: a list or file containing examples in SVM-format
        @type parameters: a dictionary or string
        @param parameters: parameters for the classifier
        @type outputFile: string
        @param outputFile: the name of the model file to be written
        """
        timer = Timer()
        parameters = cls.getParams(parameters)

        # If examples are in a list, they will be written to a file for SVM-multiclass
        if type(examples) == types.ListType:
            print >> sys.stderr, "Training SVM-MultiClass on", len(
                examples), "examples"
            trainPath = self.tempDir + "/train.dat"
            examples = self.filterTrainingSet(examples)
            Example.writeExamples(examples, trainPath)
        else:
            print >> sys.stderr, "Training SVM-MultiClass on file", examples
            trainPath = cls.stripComments(examples)
        args = ["/home/jari/Programs/liblinear-1.5-poly2/train"]
        cls.__addParametersToSubprocessCall(args, parameters)
        if outputFile == None:
            args += [trainPath, "model"]
            logFile = open("svmmulticlass.log", "at")
        else:
            args += [trainPath, outputFile]
            logFile = open(outputFile + ".log", "wt")
        rv = subprocess.call(args, stdout=logFile)
        logFile.close()
        print >> sys.stderr, timer.toString()
        return rv
Exemplo n.º 20
0
def optimizeLocal(Classifier,
                  Evaluator,
                  trainExamples,
                  testExamples,
                  classIds,
                  combinations,
                  workDir=None,
                  timeout=None):
    bestResult = None
    combinationCount = 1
    for combination in combinations:
        Stream.setIndent(" ")
        print >> sys.stderr, "Parameters " + str(combinationCount) + "/" + str(
            len(combinations)) + ":", str(combination)
        Stream.setIndent("  ")
        combinationId = getCombinationString(combination)
        # Train
        trainOutput = "model-" + combinationId
        if workDir != None:
            trainOutput = os.path.join(workDir, trainOutput)
        print >> sys.stderr, "Training..."
        timer = Timer()
        Classifier.train(trainExamples, combination, trainOutput)
        print >> sys.stderr, "Training Complete, time:", timer.toString()
        # Test
        testOutput = "classifications-" + combinationId
        if workDir != None:
            testOutput = os.path.join(workDir, testOutput)
        print >> sys.stderr, "Testing..."
        timer = Timer()
        Classifier.test(testExamples, trainOutput, testOutput)
        print >> sys.stderr, "Testing Complete, time:", timer.toString()
        # Evaluate
        evaluationOutput = "evaluation-" + combinationId + ".csv"
        if workDir != None:
            evaluationOutput = os.path.join(workDir, evaluationOutput)
        Stream.setIndent("   ")
        evaluator = Evaluator.evaluate(testExamples, testOutput, classIds,
                                       evaluationOutput)
        #print >> sys.stderr, evaluator.toStringConcise("  ")

        if bestResult == None or evaluator.compare(
                bestResult[0]
        ) > 0:  #: averageResult.fScore > bestResult[1].fScore:
            bestResult = [
                evaluator, trainOutput, testOutput, evaluationOutput,
                combination
            ]
        combinationCount += 1
    Stream.setIndent()
    print >> sys.stderr, "Selected parameters", bestResult[-1]
    return bestResult
Exemplo n.º 21
0
    def laplacian(self):
        """Computes hypergraph laplacian
        Delta=I-Theta,
        Theta=Dv^-1/2 H W De^-1 H^T Dv^-1/2
        
        Returns
        -------
        Delta: sparse matrix
            hypergraph laplacian
        """

        with Timer() as t_l:

            Theta = self.theta_matrix()
            Delta = spsp.eye(*sp.shape(Theta)) - Theta

        self.laplacian_timer = t_l.secs

        return Delta
Exemplo n.º 22
0
    def incidence_matrix(self):
        """Computes incidence matrix of size |V|*|E|
        h(v,e)=1 if v in e
        h(v,e)=0 if v not in e
        
        Returns
        -------
        H: sparse incidence matrix
            sparse incidence matrix of size |V|*|E|
        """
        with Timer() as t_in:
            H = spsp.lil_matrix(
                (sp.shape(sp.unique(self.edge_list.flatten()))[0],
                 sp.shape(self.edge_list)[0]))

            it = sp.nditer(self.edge_list, flags=['multi_index', 'refs_ok'])
            while not it.finished:
                H[it[0], it.multi_index[0]] = 1.0
                it.iternext()

        self.incidence_matrix_timer = t_in.secs
        return H
Exemplo n.º 23
0
 def waitForJobs(self, scriptNames, timeout=None):
     assert len(scriptNames) == len(outputFileNames)
     print >> sys.stderr, "Waiting for results"
     finished = 0
     louhiTimer = Timer()
     combinationStatus = {}
     while(True):
         # count finished
         finished = 0
         processStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0}
         for scriptName in scriptNames:
             status = self.getLouhiStatus(scriptName)
             combinationStatus[id] = status
             processStatus[status] += 1
         p = processStatus
         processStatusString = str(p["QUEUED"]) + " queued, " + str(p["RUNNING"]) + " running, " + str(p["FINISHED"]) + " finished, " + str(p["FAILED"]) + " failed"
         if processStatus["QUEUED"] + processStatus["RUNNING"] == 0:
             print >> sys.stderr
             print >> sys.stderr, "All jobs done (" + processStatusString + ")"
             break
         # decide what to do
         if timeout == None or louhiTimer.getElapsedTime() < timeout:
             sleepString = " [          ]     "
             print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString,
             #time.sleep(60)
             sleepTimer = Timer()
             while sleepTimer.getElapsedTime() < 60:
                 steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1
                 sleepString = " [" + steps * "." + (10-steps) * " " + "]     "
                 print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString,
                 time.sleep(5)                
         else:
             print >> sys.stderr
             print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString()
             return False
     return True
Exemplo n.º 24
0
 def test(cls,
          examples,
          modelPath,
          output=None,
          parameters=None,
          forceInternal=False):  # , timeout=None):
     """
     Classify examples with a pre-trained model.
     
     @type examples: string (filename) or list (or iterator) of examples
     @param examples: a list or file containing examples in SVM-format
     @type modelPath: string
     @param modelPath: filename of the pre-trained model file
     @type parameters: a dictionary or string
     @param parameters: parameters for the classifier
     @type output: string
     @param output: the name of the predictions file to be written
     @type forceInternal: Boolean
     @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
     """
     if forceInternal or Settings.SVMMultiClassDir == None:
         return cls.testInternal(examples, modelPath, output)
     timer = Timer()
     if type(examples) == types.ListType:
         print >> sys.stderr, "Classifying", len(
             examples), "with SVM-MultiClass model", modelPath
         examples, predictions = self.filterClassificationSet(
             examples, False)
         testPath = self.tempDir + "/test.dat"
         Example.writeExamples(examples, testPath)
     else:
         print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
         testPath = cls.stripComments(examples)
         examples = Example.readExamples(examples, False)
     args = ["/home/jari/Programs/liblinear-1.5-poly2/predict"]
     if modelPath == None:
         modelPath = "model"
     if parameters != None:
         parameters = copy.copy(parameters)
         if parameters.has_key("c"):
             del parameters["c"]
         if parameters.has_key("predefined"):
             parameters = copy.copy(parameters)
             modelPath = os.path.join(parameters["predefined"][0],
                                      "classifier/model")
             del parameters["predefined"]
         self.__addParametersToSubprocessCall(args, parameters)
     if output == None:
         output = "predictions"
         logFile = open("svmmulticlass.log", "at")
     else:
         logFile = open(output + ".log", "wt")
     args += [testPath, modelPath, output]
     #if timeout == None:
     #    timeout = -1
     #print args
     subprocess.call(args, stdout=logFile, stderr=logFile)
     predictionsFile = open(output, "rt")
     lines = predictionsFile.readlines()
     predictionsFile.close()
     predictions = []
     for i in range(len(lines)):
         predictions.append([int(lines[i].split()[0])] +
                            lines[i].split()[1:])
         #predictions.append( (examples[i],int(lines[i].split()[0]),"multiclass",lines[i].split()[1:]) )
     print >> sys.stderr, timer.toString()
     return predictions
Exemplo n.º 25
0
    def laplacian_eigs(self,
                       k=6,
                       type='SM',
                       filename=None,
                       minTol=1e-23,
                       **kwargs):
        """Computes eigenvectors of laplacian
        
        Parameters
        ----------
        k: int, optional
            number of eigenpairs
        type: str, optional
            type of eigenpairs, as specified in scipy.sparse.linalg.eigs documentation, or 'LNZ' for lowest non zero
        filename: str, optional
            if filename exists, save min eigenvalue, min eigenvector, all used eigenvalues, all used eigenvectors in json format
        kwargs: named arguments to pass to scipy.sparse.linalg.eigs function
        Returns
        -------
        eigenvals: ndarray
            array of k eigenvalues
        eigenvecs: ndarray
            array of k eigenvectors
        """
        min_dict = {}

        lap = self.laplacian().tocsc()
        if k >= lap.shape[0]:
            k = lap.shape[0] - 2
        if type == 'LNZ':

            with Timer() as t_eig:
                vals, vecs = spla.eigs(lap, k=k, which='SM', **kwargs)

            #DBG
            print(vals)
            #sort vals and vecs
            sorted_eigenvals_indices = sp.argsort(vals)
            vals = sp.array([vals[i] for i in sorted_eigenvals_indices])
            vecs = sp.array([vecs[:, i] for i in sorted_eigenvals_indices]).T

            #DBG
            print(vals)
            print(sorted_eigenvals_indices)

            self.eigs_timer = t_eig.secs

            vals_lnz_indices = [
                i for i in range(len(vals)) if vals[i] > minTol
            ]

            used_vals = sp.array([vals[i] for i in vals_lnz_indices])
            used_vecs = sp.array([vecs[:, i] for i in vals_lnz_indices]).T

            #DBG
            print('******eigendata:')
            print(used_vals)
            print(min(used_vals))
            if filename:
                min_dict['min_eigenval_used'] = sp.real(
                    min(used_vals)).tolist()
                min_dict['min_eigenvec_used'] = sp.real(
                    used_vecs[:, sp.argmin(used_vals)]).tolist()
                print('-----------eigenvec_len:')
                print(
                    sp.shape(
                        sp.real(used_vecs[:, sp.argmin(used_vals)]).tolist()))
                min_dict['eigenvals_used'] = sp.real(used_vals).tolist()
                min_dict['eigenvecs_used'] = sp.real(used_vecs).tolist()
                json_writer(min_dict, filename)

            self.__isPSD(lap, k)
            self.__test_eigenpairs(vals, vecs, lap)

            return used_vals, used_vecs
        else:
            with Timer() as t_eig:
                vals, vecs = spla.eigs(lap, k=k, which=type, **kwargs)

            sorted_eigenvals_indices = sp.argsort(vals)
            vals = sp.array([vals[i] for i in sorted_eigenvals_indices])
            vecs = sp.array([vecs[:, i] for i in sorted_eigenvals_indices]).T

            self.eigs_timer = t_eig.secs

            if filename:
                min_dict['min_eigenval_used'] = sp.real(min(vals)).tolist()
                min_dict['min_eigenvec_used'] = sp.real(
                    vecs[:, sp.argmin(vals)]).tolist()
                min_dict['eigenvals_used'] = sp.real(vals).tolist()
                min_dict['eigenvecs_used'] = sp.real(vecs).tolist()
                json_writer(min_dict, filename)

            return vals, vecs
Exemplo n.º 26
0
    async def run(self):
        self._register_tasks()

        while True:
            self.last_update = Timer.get_ms_time()
            await sleep(Config.Realm.Settings.min_timeout)
Exemplo n.º 27
0
    optparser.add_option("-i", "--input", default=defaultAnalysisFilename, dest="input", help="Corpus in analysis format", metavar="FILE")
    optparser.add_option("-o", "--output", default=None, dest="output", help="Output directory, useful for debugging")
    optparser.add_option("-c", "--classifier", default="SVMLightClassifier", dest="classifier", help="Classifier Class")
    optparser.add_option("-t", "--tokenization", default="split_gs", dest="tokenization", help="tokenization")
    optparser.add_option("-p", "--parse", default="split_gs", dest="parse", help="parse")
    optparser.add_option("-x", "--exampleBuilderParameters", default=None, dest="exampleBuilderParameters", help="Parameters for the example builder")
    optparser.add_option("-y", "--parameters", default=None, dest="parameters", help="Parameters for the classifier")
    optparser.add_option("-b", "--exampleBuilder", default="SimpleDependencyExampleBuilder", dest="exampleBuilder", help="Example Builder Class")
    optparser.add_option("-e", "--evaluator", default="BinaryEvaluator", dest="evaluator", help="Prediction evaluator class")
    optparser.add_option("-v", "--visualization", default=None, dest="visualization", help="Visualization output directory. NOTE: If the directory exists, it will be deleted!")
    optparser.add_option("-f", "--folds", default="10", dest="folds", help="X-fold cross validation")
    optparser.add_option("-d", "--paramOptData", default=None, dest="paramOptData", help="The fraction of the corpus to be always used for parameter optimization")
    optparser.add_option("-m", "--resultsToXML", default=None, dest="resultsToXML", help="Output interaction xml-file")
    (options, args) = optparser.parse_args()
    
    timer = Timer()
    print >> sys.stderr, timer.toString()
    
    if options.folds.find(",") != 0:
        options.folds = options.folds.split(",")
        assert(len(options.folds)==2)
        options.folds[0] = int(options.folds[0])
        options.folds[1] = int(options.folds[1])
        if options.paramOptData != None:
            print >> sys.stderr, "Parameter optimization set defined, parameter " + str(options.folds[1]) + "-fold cross validation will not be performed." 
    else:
        options.folds = (int(options.folds),int(options.folds))

    if options.output != None:
        if os.path.exists(options.output):
            print >> sys.stderr, "Output directory exists, removing", options.output
Exemplo n.º 28
0
class GeneralEntityTypeRecognizer(ExampleBuilder):
    def __init__(self, style=None, classSet=None, featureSet=None):
        if classSet == None:
            classSet = IdSet(1)
        assert( classSet.getId("neg") == 1 )
        if featureSet == None:
            featureSet = IdSet()
        
        ExampleBuilder.__init__(self, classSet, featureSet)
        self.styles = style
        self.timerBuildExamples = Timer(False)
        self.timerCrawl = Timer(False)
        self.timerCrawlPrecalc = Timer(False)
        self.timerMatrix = Timer(False)
        self.timerMatrixPrecalc = Timer(False)
    
    @classmethod
    def run(cls, input, output, parse, tokenization, style, idFileTag=None):
        classSet, featureSet = cls.getIdSets(idFileTag)
        e = GeneralEntityTypeRecognizer(style, classSet, featureSet)
        sentences = cls.getSentences(input, parse, tokenization)
        e.buildExamplesForSentences(sentences, output, idFileTag)
        print >> sys.stderr, "Time for buildExamples:", e.timerBuildExamples.elapsedTimeToString()
        print >> sys.stderr, "Time for Crawl:", e.timerCrawl.elapsedTimeToString()
        print >> sys.stderr, "Time for Crawl(Precalc):", e.timerCrawlPrecalc.elapsedTimeToString()
        print >> sys.stderr, "Time for Matrix:", e.timerMatrix.elapsedTimeToString()
        print >> sys.stderr, "Time for Matrix(Precalc):", e.timerMatrixPrecalc.elapsedTimeToString()

    def preProcessExamples(self, allExamples):
        if "normalize" in self.styles:
            print >> sys.stderr, " Normalizing feature vectors"
            ExampleUtils.normalizeFeatureVectors(allExamples)
        return allExamples   
    
    def getMergedEntityType(self, entities):
        """
        If a single token belongs to multiple entities of different types,
        a new, composite type is defined. This type is the alphabetically
        ordered types of these entities joined with '---'.
        """
        types = set()
        for entity in entities:
            types.add(entity.get("type"))
        types = list(types)
        types.sort()
        typeString = ""
        for type in types:
            if typeString != "":
                typeString += "---"
            typeString += type
        return typeString
    
    def getTokenFeatures(self, token, sentenceGraph):
        """
        Returns a list of features based on the attributes of a token.
        These can be used to define more complex features.
        """
        # These features are cached when this method is first called
        # for a token.
        if self.tokenFeatures.has_key(token):
            return self.tokenFeatures[token]
        
        features = []
        features.append("_txt_"+sentenceGraph.getTokenText(token))
        features.append("_POS_"+token.get("POS"))
        if sentenceGraph.tokenIsName[token]:
            features.append("_isName")
            for entity in sentenceGraph.tokenIsEntityHead[token]:
                if entity.get("isName") == "True":
                    features.append("_annType_"+entity.get("type"))
        
        self.tokenFeatures[token] = features
        return features
    
    def buildLinearOrderFeatures(self,sentenceGraph,index,tag,features):
        """
        Linear features are built by marking token features with a tag
        that defines their relative position in the linear order.
        """
        tag = "linear_"+tag
        for tokenFeature in self.getTokenFeatures(sentenceGraph.tokens[index], sentenceGraph):
            features[self.featureSet.getId(tag+tokenFeature)] = 1
    
    def buildExamples(self, sentenceGraph):
        """
        Build one example for each token of the sentence
        """
        self.timerBuildExamples.start()
        examples = []
        exampleIndex = 0
        
        self.tokenFeatures = {}
        
        namedEntityCount = 0
        for entity in sentenceGraph.entities:
            if entity.get("isName") == "True": # known data which can be used for features
                namedEntityCount += 1
        namedEntityCountFeature = "nameCount_" + str(namedEntityCount)
        
        bagOfWords = {}
        for token in sentenceGraph.tokens:
            text = "bow_" + token.get("text")
            if not bagOfWords.has_key(text):
                bagOfWords[text] = 0
            bagOfWords[text] += 1
            if sentenceGraph.tokenIsName[token]:
                text = "ne_" + text
                if not bagOfWords.has_key(text):
                    bagOfWords[text] = 0
                bagOfWords[text] += 1
        bowFeatures = {}
        for k,v in bagOfWords.iteritems():
            bowFeatures[self.featureSet.getId(k)] = v
        
        self.timerCrawl.start()
        self.timerCrawlPrecalc.start()
        self.inEdgesByToken = {}
        self.outEdgesByToken = {}
        self.edgeSetByToken = {}
        for token in sentenceGraph.tokens:
            inEdges = sentenceGraph.dependencyGraph.in_edges(token)
            inEdges.sort(compareDependencyEdgesById)
            self.inEdgesByToken[token] = inEdges
            outEdges = sentenceGraph.dependencyGraph.out_edges(token)
            outEdges.sort(compareDependencyEdgesById)
            self.outEdgesByToken[token] = outEdges
            self.edgeSetByToken[token] = set(inEdges + outEdges)
        self.timerCrawl.stop()
        self.timerCrawlPrecalc.stop()
        
        self.timerMatrix.start()
        self.timerMatrixPrecalc.start()
        self._initMatrices(sentenceGraph)
        self.timerMatrix.stop()
        self.timerMatrixPrecalc.stop()
        
        for i in range(len(sentenceGraph.tokens)):
            token = sentenceGraph.tokens[i]
            # Recognize only non-named entities (i.e. interaction words)
            if sentenceGraph.tokenIsName[token]:
                continue
            
            # CLASS
            if len(sentenceGraph.tokenIsEntityHead[token]) > 0:
                category = self.classSet.getId(self.getMergedEntityType(sentenceGraph.tokenIsEntityHead[token]))
            else:
                category = 1
            
            # FEATURES
            features = {}
            
            features[self.featureSet.getId(namedEntityCountFeature)] = 1
            #for k,v in bagOfWords.iteritems():
            #    features[self.featureSet.getId(k)] = v
            # pre-calculate bow _features_
            features.update(bowFeatures)
            
#            for j in range(len(sentenceGraph.tokens)):
#                text = "bow_" + sentenceGraph.tokens[j].get("text")
#                if j < i:
#                    features[self.featureSet.getId("bf_" + text)] = 1
#                elif j > i:
#                    features[self.featureSet.getId("af_" + text)] = 1
        
            # Main features
            text = token.get("text")
            features[self.featureSet.getId("txt_"+text)] = 1
            features[self.featureSet.getId("POS_"+token.get("POS"))] = 1
            stem = PorterStemmer.stem(text)
            features[self.featureSet.getId("stem_"+stem)] = 1
            features[self.featureSet.getId("nonstem_"+text[len(stem):])] = 1
            
            # Linear order features
            for index in [-3,-2,-1,1,2,3]:
                if i + index > 0 and i + index < len(sentenceGraph.tokens):
                    self.buildLinearOrderFeatures(sentenceGraph, i + index, str(index), features)
            
            # Content
            if i > 0 and text[0].isalpha() and text[0].isupper():
                features[self.featureSet.getId("upper_case_start")] = 1
            for j in range(len(text)):
                if j > 0 and text[j].isalpha() and text[j].isupper():
                    features[self.featureSet.getId("upper_case_middle")] = 1
                # numbers and special characters
                if text[j].isdigit():
                    features[self.featureSet.getId("has_digits")] = 1
                    if j > 0 and text[j-1] == "-":
                        features[self.featureSet.getId("has_hyphenated_digit")] = 1
                elif text[j] == "-":
                    features[self.featureSet.getId("has_hyphen")] = 1
                elif text[j] == "/":
                    features[self.featureSet.getId("has_fslash")] = 1
                elif text[j] == "\\":
                    features[self.featureSet.getId("has_bslash")] = 1
                # duplets
                if j > 0:
                    features[self.featureSet.getId("dt_"+text[j-1:j+1].lower())] = 1
                # triplets
                if j > 1:
                    features[self.featureSet.getId("tt_"+text[j-2:j+1].lower())] = 1
            
            # Attached edges (Hanging in and out edges)
            t1InEdges = self.inEdgesByToken[token]
            for edge in t1InEdges:
                edgeType = edge[2].get("type")
                features[self.featureSet.getId("t1HIn_"+edgeType)] = 1
                features[self.featureSet.getId("t1HIn_"+edge[0].get("POS"))] = 1
                features[self.featureSet.getId("t1HIn_"+edgeType+"_"+edge[0].get("POS"))] = 1
                tokenText = sentenceGraph.getTokenText(edge[0])
                features[self.featureSet.getId("t1HIn_"+tokenText)] = 1
                features[self.featureSet.getId("t1HIn_"+edgeType+"_"+tokenText)] = 1
            t1OutEdges = self.outEdgesByToken[token]
            for edge in t1OutEdges:
                edgeType = edge[2].get("type")
                features[self.featureSet.getId("t1HOut_"+edgeType)] = 1
                features[self.featureSet.getId("t1HOut_"+edge[1].get("POS"))] = 1
                features[self.featureSet.getId("t1HOut_"+edgeType+"_"+edge[1].get("POS"))] = 1
                tokenText = sentenceGraph.getTokenText(edge[1])
                features[self.featureSet.getId("t1HOut_"+tokenText)] = 1
                features[self.featureSet.getId("t1HOut_"+edgeType+"_"+tokenText)] = 1
             
            extra = {"xtype":"token","t":token.get("id")}
            examples.append( (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) )
            exampleIndex += 1
            
            # chains
            copyFeatures = copy.copy(features)
            self.timerCrawl.start()
            self.buildChains(token, sentenceGraph, features)
            self.timerCrawl.stop()
            self.timerMatrix.start()
            self.buildChainsAlternative(token, copyFeatures, sentenceGraph)
            self.timerMatrix.stop()
            diff1 = set(features.keys()) - set(copyFeatures.keys())
            diff2 = set(copyFeatures.keys()) - set(features.keys())
            if len(diff1) != 0 or len(diff2) != 0:
                print "Error for token", token.get("id"), token.get("text")
                intersection = set(features.keys()) & set(copyFeatures.keys())
                print "d1:",
                for key in sorted(diff1):
                    print self.featureSet.getName(key) + ",",
                print
                print "d2:",
                for key in sorted(diff2):
                    print self.featureSet.getName(key) + ",",
                print
                print "int:",
                intNames = []
                for key in sorted(intersection):
                    intNames.append(self.featureSet.getName(key))
                for name in sorted(intNames):
                    print name + ",",
                print
                #assert(len(diff1) == 0)
        self.timerBuildExamples.stop()
        return examples
    
    def _initMatrices(self, sentenceGraph):
        nodes = sentenceGraph.dependencyGraph.nodes()
        self.dod1 = self._dodFromGraph(sentenceGraph, nodes)
        self.dod2 = self.multDictOfDicts(self.dod1, self.dod1, nodes)
        self.dod3 = self.multDictOfDicts(self.dod2, self.dod1, nodes)
        #self.toStringMatrix(self.dod1)
        #self.toStringMatrix(self.dod2)
        #self.toStringMatrix(self.dod3)
            
    def _dodFromGraph(self, sentenceGraph, nodes):
        graph = sentenceGraph.dependencyGraph
        dod = {}
        for i in nodes:
            dod[i] = {}
        for i in nodes:
            for j in nodes:
                edge = graph.get_edge(i, j)
                if len(edge) > 0:
                    if not dod[i].has_key(j):
                        dod[i][j] = []
                    if not dod[j].has_key(i):
                        dod[j][i] = []
                    for e in edge:
                        t1 = sentenceGraph.tokensById[e.get("t1")]
                        t2 = sentenceGraph.tokensById[e.get("t2")]
                        # list of visited tokens, last edge of chain, chain string
                        dod[i][j].append( ([t1, t2], e, "frw_"+e.get("type")) ) # frw
                        dod[j][i].append( ([t2, t1], e, "rev_"+e.get("type")) ) # rev
        return dod

    def overlap(self, list1, list2):
        for i in list1:
            for j in list2:
                if i == j: # duplicate dependency
                    return True
        return False
    
    def extendPaths(self, edges1, edges2):
        newEdges = []
        for e1 in edges1:
            for e2 in edges2:
                if not self.overlap(e1[0], e2[0][1:]):
                    newEdges.append( (e1[0] + e2[0][1:], e2[1], e1[2] + "-" + e2[2]) )
        return newEdges

    def multDictOfDicts(self, dod1, dod2, nodes):
        result = {}
        for i in nodes:
            result[i] = {}
        for i in nodes:
            for j in nodes:
                for k in nodes:
                    if dod1[i].has_key(k):
                        edges1 = dod1[i][k]
                    else:
                        edges1 = []
                    if dod2[k].has_key(j):
                        edges2 = dod2[k][j]
                    else:
                        edges2 = []
                    newPaths = self.extendPaths(edges1, edges2)
                    if len(newPaths) > 0:
                        if result[i].has_key(j):
                            result[i][j].extend(newPaths)
                        else:
                            result[i][j] = newPaths
        return result

#    def toStringMatrix(self, matrix):
#        for i in matrix.keys():
#            for j in matrix[i].keys():
#                newList = []
#                for l in matrix[i][j]:
#                    string = ""
#                    for obj in l:
#                        if string != "":
#                            string += "-"
#                        if obj[1]:
#                            string += "frw_"+str(obj[0].get("type"))
#                        else:
#                            string += "rev_"+str(obj[0].get("type"))
#                    newList.append( (l, string) )
#                matrix[i][j] = newList
    
    def buildChainsAlternative(self, token, features, sentenceGraph):
        self._buildChainsMatrix(self.dod1, token, features, 3, sentenceGraph)
        self._buildChainsMatrix(self.dod2, token, features, 2, sentenceGraph)
        self._buildChainsMatrix(self.dod3, token, features, 1, sentenceGraph)
    
    def _buildChainsMatrix(self, matrix, token, features, depth, sentenceGraph):
        strDepthLeft = "dist_" + str(depth)
        for node in matrix[token].keys():
            if node == token: # don't allow self-loops
                continue
            for tokenFeature in self.getTokenFeatures(node, sentenceGraph):
                features[self.featureSet.getId(strDepthLeft + tokenFeature)] = 1
            for chain in matrix[token][node]:
                features[self.featureSet.getId("chain_"+strDepthLeft+"-"+chain[2])] = 1
                features[self.featureSet.getId("dep_"+strDepthLeft+chain[1].get("type"))] = 1
            
    
    def buildChains(self,token,sentenceGraph,features,depthLeft=3,chain="",visited=None):
        if depthLeft == 0:
            return
        strDepthLeft = "dist_" + str(depthLeft)
        
        if visited == None:
            visited = set()

        inEdges = self.inEdgesByToken[token]
        outEdges = self.outEdgesByToken[token]
        edgeSet = visited.union(self.edgeSetByToken[token])
        for edge in inEdges:
            if not edge in visited:
                edgeType = edge[2].get("type")
                features[self.featureSet.getId("dep_"+strDepthLeft+edgeType)] = 1

                nextToken = edge[0]
                for tokenFeature in self.getTokenFeatures(nextToken, sentenceGraph):
                    features[self.featureSet.getId(strDepthLeft + tokenFeature)] = 1
#                for entity in sentenceGraph.tokenIsEntityHead[nextToken]:
#                    if entity.get("isName") == "True":
#                        features[self.featureSet.getId("name_dist_"+strDepthLeft)] = 1
#                        features[self.featureSet.getId("name_dist_"+strDepthLeft+entity.get("type"))] = 1
#                features[self.featureSet.getId("POS_dist_"+strDepthLeft+nextToken.get("POS"))] = 1
#                tokenText = sentenceGraph.getTokenText(nextToken)
#                features[self.featureSet.getId("text_dist_"+strDepthLeft+tokenText)] = 1
                
                features[self.featureSet.getId("chain_"+strDepthLeft+chain+"-rev_"+edgeType)] = 1
                self.buildChains(nextToken,sentenceGraph,features,depthLeft-1,chain+"-rev_"+edgeType,edgeSet)

        for edge in outEdges:
            if not edge in visited:
                edgeType = edge[2].get("type")
                features[self.featureSet.getId("dep_"+strDepthLeft+edgeType)] = 1

                nextToken = edge[1]
                for tokenFeature in self.getTokenFeatures(nextToken, sentenceGraph):
                    features[self.featureSet.getId(strDepthLeft + tokenFeature)] = 1
#                for entity in sentenceGraph.tokenIsEntityHead[nextToken]:
#                    if entity.get("isName") == "True":
#                        features[self.featureSet.getId("name_dist_"+strDepthLeft)] = 1
#                        features[self.featureSet.getId("name_dist_"+strDepthLeft+entity.get("type"))] = 1
#                features[self.featureSet.getId("POS_dist_"+strDepthLeft+nextToken.get("POS"))] = 1
#                tokenText = sentenceGraph.getTokenText(nextToken)
#                features[self.featureSet.getId("text_dist_"+strDepthLeft+tokenText)] = 1
                
                features[self.featureSet.getId("chain_"+strDepthLeft+chain+"-frw_"+edgeType)] = 1
                self.buildChains(nextToken,sentenceGraph,features,depthLeft-1,chain+"-frw_"+edgeType,edgeSet)
Exemplo n.º 29
0
    def optimize(self,
                 trainSets,
                 classifySets,
                 parameters=defaultOptimizationParameters,
                 evaluationClass=None,
                 evaluationArgs={},
                 combinationsThatTimedOut=None):
        if parameters.has_key("predefined"):
            print >> sys.stderr, "Predefined model, skipping parameter estimation"
            return {"predefined": parameters["predefined"]}

        print >> sys.stderr, "Optimizing parameters"
        parameterNames = parameters.keys()
        parameterNames.sort()
        #        for p in self.notOptimizedParameters:
        #            if p in parameterNames:
        #                parameterNames.remove(p)
        parameterValues = []
        for parameterName in parameterNames:
            parameterValues.append([])
            for value in parameters[parameterName]:
                parameterValues[-1].append((parameterName, value))
        combinationLists = combine.combine(*parameterValues)
        combinations = []
        for combinationList in combinationLists:
            combinations.append({})
            for value in combinationList:
                combinations[-1][value[0]] = value[1]
        if combinationsThatTimedOut == None:
            combinationsThatTimedOut = []


#        # re-add non-optimized parameters to combinations
#        for p in self.notOptimizedParameters:
#            if parameters.has_key(p):
#                for combination in combinations:
#                    combination[p] = parameters[p]

        bestResult = None
        combinationCount = 1
        if hasattr(self, "tempDir"):
            mainTempDir = self.tempDir
            mainDebugFile = self.debugFile
        for combination in combinations:
            print >> sys.stderr, " Parameters " + str(
                combinationCount) + "/" + str(
                    len(combinations)) + ":", str(combination),
            skip = False
            #print combinationsThatTimedOut
            for discarded in combinationsThatTimedOut:
                if self._dictIsIdentical(combination, discarded):
                    print >> sys.stderr
                    print >> sys.stderr, "  Discarded before, skipping"
                    skip = True
                    break
            if skip:
                continue
            # Make copies of examples in case they are modified
            fold = 1
            foldResults = []
            for classifyExamples in classifySets:
                if type(trainSets[0]) == types.StringType:
                    trainExamples = trainSets[0]
                else:
                    trainExamples = []
                    for trainSet in trainSets:
                        if trainSet != classifyExamples:
                            trainExamples.extend(trainSet)
                trainExamplesCopy = trainExamples
                if type(trainExamples) == types.ListType:
                    trainExamplesCopy = trainExamples  #ExampleUtils.copyExamples(trainExamples)
                classifyExamplesCopy = classifyExamples
                if type(classifyExamples) == types.ListType:
                    classifyExamplesCopy = classifyExamples  #ExampleUtils.copyExamples(classifyExamples)
                if hasattr(self, "tempDir"):
                    self.tempDir = mainTempDir + "/parameters" + str(
                        combinationCount) + "/optimization" + str(fold)
                    if not os.path.exists(self.tempDir):
                        os.makedirs(self.tempDir)
                    self.debugFile = open(self.tempDir + "/debug.txt", "wt")

                timer = Timer()
                #trainStartTime = time.time()
                trainRV = self.train(trainExamplesCopy, combination)
                #trainTime = time.time() - trainStartTime
                #print >> sys.stderr, " Time spent:", trainTime, "s"
                print >> sys.stderr, " Time spent:", timer.elapsedTimeToString(
                )
                if trainRV == 0:
                    predictions = self.classify(classifyExamplesCopy)
                    evaluation = evaluationClass(predictions, **evaluationArgs)
                    if len(classifySets) == 1:
                        print >> sys.stderr, evaluation.toStringConcise("  ")
                    else:
                        print >> sys.stderr, evaluation.toStringConcise(
                            indent="  ", title="Fold " + str(fold))
                    foldResults.append(evaluation)
                    if hasattr(self, "tempDir"):
                        evaluation.saveCSV(self.tempDir + "/results.csv")
                else:
                    combinationsThatTimedOut.append(combination)
                    print >> sys.stderr, "  Timed out"
                fold += 1
            if len(foldResults) > 0:
                averageResult = evaluationClass.average(foldResults)
                poolResult = evaluationClass.pool(foldResults)
                if hasattr(self, "tempDir"):
                    TableUtils.writeCSV(
                        combination, mainTempDir + "/parameters" +
                        str(combinationCount) + ".csv")
                    averageResult.saveCSV(mainTempDir + "/parameters" +
                                          str(combinationCount) +
                                          "/resultsAverage.csv")
                    poolResult.saveCSV(mainTempDir + "/parameters" +
                                       str(combinationCount) +
                                       "/resultsPooled.csv")
                if len(classifySets) > 1:
                    print >> sys.stderr, averageResult.toStringConcise(
                        "  Avg: ")
                    print >> sys.stderr, poolResult.toStringConcise("  Pool: ")
                if bestResult == None or poolResult.compare(
                        bestResult[1]
                ) > 0:  #: averageResult.fScore > bestResult[1].fScore:
                    #bestResult = (predictions, averageResult, combination)
                    bestResult = (None, poolResult, combination)
                    # Make sure memory is released, especially important since some of the previous steps
                    # copy examples
                    bestResult[1].classifications = None
                    bestResult[1].predictions = None
            combinationCount += 1
            if hasattr(self, "tempDir"):
                self.debugFile.close()
        if hasattr(self, "tempDir"):
            self.tempDir = mainTempDir
            self.debugFile = mainDebugFile
        return bestResult
Exemplo n.º 30
0
        "--visualization",
        default=None,
        dest="visualization",
        help=
        "Visualization output directory. NOTE: If the directory exists, it will be deleted!"
    )
    optparser.add_option(
        "-m",
        "--resultsToXML",
        default=None,
        dest="resultsToXML",
        help="Results in analysis xml. NOTE: for edges, pairs, not interactions"
    )
    (options, args) = optparser.parse_args()

    mainTimer = Timer()
    print >> sys.stderr, __file__ + " start, " + mainTimer.toString()

    if options.output != None:
        if os.path.exists(options.output):
            print >> sys.stderr, "Output directory exists, removing", options.output
            shutil.rmtree(options.output)
        os.mkdir(options.output)
        if not os.path.exists(options.output + "/classifier"):
            os.mkdir(options.output + "/classifier")

    classifierParamDict = splitParameters(options.parameters)

    print >> sys.stderr, "Importing modules"
    exec "from ExampleBuilders." + options.exampleBuilder + " import " + options.exampleBuilder + " as ExampleBuilder"
    exec "from Classifiers." + options.classifier + " import " + options.classifier + " as Classifier"
Exemplo n.º 31
0
    def __init__(self, view_origin, view_dim, screen):
        self.view_plane = Plane(view_origin, view_dim)
        self.screen = screen
        self.zoom_in = Timer()
        self.zoom_out = Timer()
        self.move_left = Timer()
        self.move_right = Timer()
        self.move_up = Timer()
        self.move_down = Timer()
        self.zoom_max = 10
        self.zoom_min = 1
        self.move_sensitivity = 50
        self.zoom_sensitivity = 1

        self.key_events = {
            'r': self.toggle_zoom_in,
            'e': self.toggle_zoom_out,
            'right': self.toggle_move_right,
            'left': self.toggle_move_left,
            'up': self.toggle_move_up,
            'down': self.toggle_move_down
        }
Exemplo n.º 32
0
def optimizeCSC(Classifier,
                Evaluator,
                trainExamples,
                testExamples,
                classIds,
                combinations,
                workDir=None,
                timeout=None,
                cscConnection=None,
                downloadAllModels=False,
                steps="BOTH",
                threshold=False):
    bestResult = None
    combinationCount = 1
    combinationIds = []
    assert steps in ["BOTH", "SUBMIT", "RESULTS"], steps

    if type(classIds) == types.StringType:
        classIds = IdSet(filename=classIds)
    if Classifier.__name__ == "MultiLabelClassifier":
        negClass1 = True
        if "classifier" in combinations[0] and combinations[0][
                "classifier"] == "svmperf":
            negClass1 = False
        print "negclass1", negClass1
        Classifier.makeClassFiles(trainExamples,
                                  testExamples,
                                  classIds,
                                  negClass1=negClass1)

    if steps in ["BOTH", "SUBMIT"]:
        print >> sys.stderr, "Initializing runs"
        for combination in combinations:
            Stream.setIndent(" ")
            print >> sys.stderr, "Parameters " + str(
                combinationCount) + "/" + str(
                    len(combinations)) + ":", str(combination)
            # Train
            combinationIds.append(
                Classifier.initTrainAndTestOnLouhi(trainExamples, testExamples,
                                                   combination, cscConnection,
                                                   workDir, classIds))
            combinationCount += 1
    else:
        for combination in combinations:
            idStr = ""
            for key in sorted(combination.keys()):
                idStr += "-" + str(key) + "_" + str(combination[key])
            combinationIds.append(idStr)
    Stream.setIndent()

    if steps in ["BOTH", "RESULTS"]:
        Stream.setIndent(" ")
        print >> sys.stderr, "Waiting for results"
        finished = 0
        louhiTimer = Timer()
        #combinationStatus = {}
        while (True):
            # count finished
            finished = 0
            processStatus = {
                "FINISHED": 0,
                "QUEUED": 0,
                "FAILED": 0,
                "RUNNING": 0
            }
            for id in combinationIds:
                #status = Classifier.getLouhiStatus(id, cscConnection)
                #combinationStatus[id] = status
                #processStatus[status] += 1
                Classifier.getLouhiStatus(id, cscConnection, processStatus,
                                          classIds)
            p = processStatus
            processStatusString = str(p["QUEUED"]) + " queued, " + str(
                p["RUNNING"]) + " running, " + str(
                    p["FINISHED"]) + " finished, " + str(
                        p["FAILED"]) + " failed"
            if processStatus["QUEUED"] + processStatus["RUNNING"] == 0:
                print >> sys.stderr
                print >> sys.stderr, "All runs done (" + processStatusString + ")"
                break
            # decide what to do
            if timeout == None or louhiTimer.getElapsedTime() < timeout:
                sleepString = " [          ]     "
                print >> sys.stderr, "\rWaiting for " + str(
                    len(combinations)
                ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString(
                ) + sleepString,
                #time.sleep(60)
                sleepTimer = Timer()
                while sleepTimer.getElapsedTime() < 60:
                    steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1
                    sleepString = " [" + steps * "." + (10 -
                                                        steps) * " " + "]     "
                    print >> sys.stderr, "\rWaiting for " + str(
                        len(combinations)
                    ) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString(
                    ) + sleepString,
                    time.sleep(5)
            else:
                print >> sys.stderr
                print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString(
                )
                break

        print >> sys.stderr, "Evaluating results"
        #if type(testExamples) != types.ListType:
        #    print >> sys.stderr, "Loading examples from file", testExamples
        #    testExamples = ExampleUtils.readExamples(testExamples,False)
        bestCombinationId = None
        for i in range(len(combinationIds)):
            id = combinationIds[i]
            Stream.setIndent(" ")
            # Evaluate
            predictions = Classifier.getLouhiPredictions(
                id, cscConnection, workDir, classIds)
            if predictions == None:
                print >> sys.stderr, "No results for combination" + id
            else:
                if downloadAllModels:
                    modelFileName = Classifier.downloadModel(
                        id, cscConnection, workDir)
                    if workDir != None:
                        modelFileName = os.path.join(workDir, modelFileName)
                        subprocess.call("gzip -fv " + modelFileName,
                                        shell=True)
                print >> sys.stderr, "Evaluating results for combination" + id
                evaluationOutput = "evaluation" + id + ".csv"
                if workDir != None:
                    evaluationOutput = os.path.join(workDir, evaluationOutput)
                evaluator = Evaluator.evaluate(testExamples, predictions,
                                               classIds, evaluationOutput)
                if threshold:
                    print >> sys.stderr, "Thresholding"
                    evaluator.determineThreshold(testExamples, predictions)
                if Classifier.__name__ != "MultiLabelClassifier":
                    if bestResult == None or evaluator.compare(
                            bestResult[0]
                    ) > 0:  #: averageResult.fScore > bestResult[1].fScore:
                        bestResult = [
                            evaluator, None, predictions, evaluationOutput,
                            combinations[i]
                        ]
                        bestCombinationId = id
                else:
                    assert Evaluator.__name__ == "MultiLabelEvaluator", Evaluator.__name__
                    if bestResult == None:
                        bestResult = [{}, None]
                        for className in classIds.Ids:
                            if className != "neg" and "---" not in className:
                                bestResult[0][className] = [
                                    -1, None,
                                    classIds.getId(className), None
                                ]
                    for className in classIds.Ids:
                        if className != "neg" and "---" not in className:
                            fscore = evaluator.dataByClass[classIds.getId(
                                className)].fscore
                            if fscore > bestResult[0][className][0]:
                                bestResult[0][className] = [
                                    fscore, id, bestResult[0][className][2]
                                ]
                                if threshold:
                                    classId = classIds.getId(className, False)
                                    if classId in evaluator.thresholds:
                                        bestResult[0][className].append(
                                            evaluator.thresholds[classId])
                                    else:
                                        bestResult[0][className].append(0.0)
                                else:
                                    bestResult[0][className].append(None)
                    bestCombinationId = bestResult
                os.remove(predictions)  # remove predictions to save space
        Stream.setIndent()
        print >> sys.stderr, "Selected parameters", bestResult[-1]
        #if Classifier.__name__ == "MultiLabelClassifier":
        #    evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput)

        # Download best model and predictions
        modelFileName = Classifier.downloadModel(bestCombinationId,
                                                 cscConnection, workDir)
        if workDir != None:
            modelFileName = os.path.join(workDir, modelFileName)
        subprocess.call("gzip -fv " + modelFileName, shell=True)
        modelFileName = modelFileName + ".gz"
        #if Classifier.__name__ != "MultiLabelClassifier":
        #bestResult = [None, None]
        bestResult[1] = modelFileName
        return bestResult
Exemplo n.º 33
0
    def perform_rsa(self,
                    draw="NONE",
                    print_times="NONE",
                    save_summary=False,
                    save_data=None):

        if not draw in ["NONE", "ITERATION", "END"]:
            raise ValueError("draw must be either: NONE, ITERATION or END")

        if not print_times in ["NONE", "ALL", "TOTAL"]:
            raise ValueError("print_times must be either: NONE, ALL or TOTAL")

        print_times_all = print_times == "ALL"
        iter_timers = []

        summary_dict = {
            "configuration": {
                "fig_radiuses": self.fig_radiuses.astype(float).tolist(),
                "fig_positions": self.fig_xys.astype(float).tolist(),
                "cell_num_world_size":
                [int(self.cell_num_x),
                 int(self.cell_num_y)],
                "cell_size": float(self.cell_size),
                "added_fig_num": int(self.added_fig_num),
                "voxel_removal_treshold": float(self.voxel_removal_treshold),
                "voxel_num_treshold": int(self.voxel_num_treshold),
                "fig_area": float(self.fig_area),
                "fig_radius": float(self.fig_radius),
                "version": float(self.version)
            }
        }

        iterations_data = []
        voxel_fraction = 1.0

        self.initialise_rsa()
        while (self.voxel_num > 0):

            timer_iter = Timer()
            timer_iter.start_timer("iteration")

            timer_iter.start_timer("generation")
            self.generate_figs()
            g_t = timer_iter.stop_timer("generation", print_times_all)

            timer_iter.start_timer("reject_vs_existing")
            self.reject_figs_vs_existing()
            re_t = timer_iter.stop_timer("reject_vs_existing", print_times_all)

            timer_iter.start_timer("reject_vs_new")
            self.reject_figs_vs_new()
            rn_t = timer_iter.stop_timer("reject_vs_new", print_times_all)

            timer_iter.start_timer("split_voxels")
            voxel_added_cond = (
                1.0 - (self.successfully_added_figs_num /
                       self.added_fig_num)) > self.voxel_removal_treshold
            voxel_num_cond = self.successfully_added_figs_num == 0 or self.voxel_num < self.voxel_num_treshold
            if voxel_added_cond and voxel_num_cond:
                self.split_voxels()
                voxel_fraction = 0.5 * voxel_fraction
            s_t = timer_iter.stop_timer("split_voxels", print_times_all)

            timer_iter.start_timer("reject_voxels")
            self.reject_voxels()
            rv_t = timer_iter.stop_timer("reject_voxels", print_times_all)

            i_t = timer_iter.stop_timer("iteration", print_times_all)
            iter_timers.append(timer_iter.get_timers())

            iteration_dict = {
                "timers": {
                    "generation": g_t,
                    "reject_vs_existing": re_t,
                    "reject_vs_new": rn_t,
                    "split_voxels": s_t,
                    "reject_voxels": rv_t,
                    "iteration": i_t
                },
                "data": {
                    "voxel_num": int(self.voxel_num),
                    "voxel_fraction": voxel_fraction,
                    "fig_num": int(self.fig_num),
                    "density": self.calculate_density()
                }
            }
            iterations_data.append(iteration_dict)

            if draw == "ITERATION":
                draw_func(self)

            if print_times_all:
                print("DATA: figures:", self.fig_num)
                print("DATA: voxels:", self.voxel_num)
                print("DATA: voxel_fraction:", voxel_fraction)
                print("DATA: density:", self.calculate_density())
                print("===================")

            self.iteration += 1

        total_time = sum([t["iteration"][2] for t in iter_timers])
        if print_times == "ALL" or print_times == "TOTAL":
            print("DATA: figures:", self.fig_num)
            print("DATA: voxels:", self.voxel_num)
            print("DATA: voxel_fraction:", voxel_fraction)
            print("DATA: density:", self.calculate_density())

            name = "total"
            print(f'TIMER: {name:20s} {total_time:.20f}')

        if draw == "END":
            draw_func(self)

        final_dict = {
            "voxel_fraction": voxel_fraction,
            "fig_num": int(self.fig_num),
            "density": self.calculate_density(),
            "total_time": total_time
        }

        summary_dict["iterations"] = iterations_data
        summary_dict["summary"] = final_dict
        if save_summary:
            record_run(summary_dict)
        if save_data != None:
            save_output(self.figs, self.fig_num, save_data)

        self.finalise()
        return summary_dict
Exemplo n.º 34
0
    def spectral_clustering(self,
                            clusters_n,
                            k=6,
                            type='SM',
                            embed_type='custom',
                            **kwargs):
        """Performing k-means spectral clustering on laplacian eigenvectors via scikit-learn kmeans algo
        
        Parameters
        ----------
        clusters_n: int
            number of clusters
        k: int, optional
            num of eigenvectors to base kmeans
        type: str, optional
            type of eigenvectors to use for kmeans, as specified in laplacian_eigs
        embed_type: str, optional
            choices: 'custom' - perform embedding using hypergraph laplacian and custom implemented embedding
                     'sklearn_laplacian' - perform embedding using modified sklearn.spectral.embedding using the hypergraph laplacian
                     'sklearn_adjacency' - perform embedding using original sklearn.spectral.embedding using the hypergraph adjacency matrix
            default is 'custom'
        kwargs: named arguments to pass to laplacian eigs function

        Returns
        -------
        centroid: ndarray of shape (k, n_features)
        label: ndarray of shape (n_samples,)
        label_dict: dictionary
            dictionary containing {partiteName: { id: communityId , ...} , ... }
        node_tags: list of str
            order of partites, as found in hyperedges
        inertia: float
        """
        if embed_type == 'sklearn_laplacian':
            f = None
            if 'filename' in kwargs:
                f = kwargs.pop('filename')
            if 'minTol' in kwargs:
                kwargs.pop('minTol')
            if 'maxiter' in kwargs:
                kwargs.pop('maxiter')
            eigenvecs = spectral_embedding(self.laplacian(), clusters_n,
                                           **kwargs)

            #===================================================================
            # #DBG
            # print(eigenvecs)
            # print(eigenvecs.min())
            # print(eigenvecs.max())
            # print(eigenvecs.mean())
            #===================================================================
            if f:
                json_writer({'eigenvecs_used': sp.real(eigenvecs).tolist()}, f)

        elif embed_type == 'sklearn_adjacency':
            f = None
            if 'filename' in kwargs:
                f = kwargs.pop('filename')
            if 'minTol' in kwargs:
                kwargs.pop('minTol')
            if 'maxiter' in kwargs:
                kwargs.pop('maxiter')
            eigenvecs = skmanifold.spectral_embedding(self.adjacency_matrix(),
                                                      clusters_n, **kwargs)

            #===================================================================
            # #DBG
            # print(eigenvecs)
            # print(eigenvecs.min())
            # print(eigenvecs.max())
            # print(eigenvecs.mean())
            #===================================================================
            if f:
                json_writer({'eigenvecs_used': sp.real(eigenvecs).tolist()}, f)

        else:
            eigenvecs = self.laplacian_eigs(k, type, **kwargs)[1]

        with Timer() as t_cl:
            cen, lab, inert = sklearn.cluster.k_means(eigenvecs, clusters_n)

        self.clustering_timer = t_cl.secs

        label_dict = self._community_vector_match(lab)

        return cen, lab, label_dict, self.node_tags, inert
Exemplo n.º 35
0
    optparser.add_option(
        "-d",
        "--paramOptData",
        default=None,
        dest="paramOptData",
        help=
        "The fraction of the corpus to be always used for parameter optimization"
    )
    optparser.add_option("-m",
                         "--resultsToXML",
                         default=None,
                         dest="resultsToXML",
                         help="Output interaction xml-file")
    (options, args) = optparser.parse_args()

    timer = Timer()
    print >> sys.stderr, timer.toString()

    if options.folds.find(",") != 0:
        options.folds = options.folds.split(",")
        assert (len(options.folds) == 2)
        options.folds[0] = int(options.folds[0])
        options.folds[1] = int(options.folds[1])
        if options.paramOptData != None:
            print >> sys.stderr, "Parameter optimization set defined, parameter " + str(
                options.folds[1]
            ) + "-fold cross validation will not be performed."
    else:
        options.folds = (int(options.folds), int(options.folds))

    if options.output != None:
Exemplo n.º 36
0
    def test(cls,
             examples,
             modelPath,
             output=None,
             parameters=None,
             forceInternal=False,
             classIds=None):  # , timeout=None):
        """
        Classify examples with a pre-trained model.
        
        @type examples: string (filename) or list (or iterator) of examples
        @param examples: a list or file containing examples in SVM-format
        @type modelPath: string
        @param modelPath: filename of the pre-trained model file
        @type parameters: a dictionary or string
        @param parameters: parameters for the classifier
        @type output: string
        @param output: the name of the predictions file to be written
        @type forceInternal: Boolean
        @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
        """
        if type(parameters) == types.StringType:
            parameters = splitParameters(parameters)
        timer = Timer()
        if type(examples) == types.ListType:
            print >> sys.stderr, "Classifying", len(
                examples), "with SVM-MultiClass model", modelPath
            examples, predictions = self.filterClassificationSet(
                examples, False)
            testPath = self.tempDir + "/test.dat"
            Example.writeExamples(examples, testPath)
        else:
            print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
            testPath = examples
            examples = Example.readExamples(examples, False)
        if parameters != None:
            parameters = copy.copy(parameters)
            if parameters.has_key("c"):
                del parameters["c"]
            if parameters.has_key("predefined"):
                parameters = copy.copy(parameters)
                modelPath = os.path.join(parameters["predefined"][0],
                                         "classifier/model")
                del parameters["predefined"]
        # Read model
        if modelPath == None:
            modelPath = "model-multilabel"
        classModels = {}
        if modelPath.endswith(".gz"):
            f = gzip.open(modelPath, "rt")
        else:
            f = open(modelPath, "rt")
        thresholds = {}
        for line in f:
            key, value, threshold = line.split()
            classModels[key] = value
            if threshold != "None":
                thresholds[key] = float(threshold)
            else:
                thresholds[key] = 0.0
        f.close()
        mergedPredictions = []
        if type(classIds) == types.StringType:
            classIds = IdSet(filename=classIds)
        #print classModels
        print "Thresholds", thresholds
        classifierBin = Settings.SVMMultiClassDir + "/svm_multiclass_classify"
        print parameters
        if "classifier" in parameters and "svmperf" in parameters["classifier"]:
            classifierBin = Settings.SVMPerfDir + "/svm_perf_classify"
            parameters = copy.copy(parameters)
            del parameters["classifier"]
        for className in classIds.getNames():
            if className != "neg" and not "---" in className:
                classId = classIds.getId(className)
                if thresholds[str(className)] != 0.0:
                    print >> sys.stderr, "Classifying", className, "with threshold", thresholds[
                        str(className)]
                else:
                    print >> sys.stderr, "Classifying", className
                args = [classifierBin]
                #self.__addParametersToSubprocessCall(args, parameters)
                classOutput = "predictions" + ".cls-" + className
                logFile = open("svmmulticlass" + ".cls-" + className + ".log",
                               "at")
                args += [testPath, classModels[str(className)], classOutput]
                print args
                subprocess.call(args, stdout=logFile, stderr=logFile)
                cls.addPredictions(classOutput,
                                   mergedPredictions,
                                   classId,
                                   len(classIds.Ids),
                                   threshold=thresholds[str(className)])
        print >> sys.stderr, timer.toString()

        predFileName = output
        f = open(predFileName, "wt")
        for mergedPred in mergedPredictions:
            if len(mergedPred[0]) > 1 and "1" in mergedPred[0]:
                mergedPred[0].remove("1")
            mergedPred[1] = str(mergedPred[1])
            mergedPred[0] = ",".join(sorted(list(mergedPred[0])))
            f.write(" ".join(mergedPred) + "\n")
        f.close()

        return mergedPredictions
Exemplo n.º 37
0
 def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False, classIds=None): # , timeout=None):
     """
     Classify examples with a pre-trained model.
     
     @type examples: string (filename) or list (or iterator) of examples
     @param examples: a list or file containing examples in SVM-format
     @type modelPath: string
     @param modelPath: filename of the pre-trained model file
     @type parameters: a dictionary or string
     @param parameters: parameters for the classifier
     @type output: string
     @param output: the name of the predictions file to be written
     @type forceInternal: Boolean
     @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
     """
     if type(parameters) == types.StringType:
         parameters = splitParameters(parameters)
     timer = Timer()
     if type(examples) == types.ListType:
         print >> sys.stderr, "Classifying", len(examples), "with SVM-MultiClass model", modelPath
         examples, predictions = self.filterClassificationSet(examples, False)
         testPath = self.tempDir+"/test.dat"
         Example.writeExamples(examples, testPath)
     else:
         print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
         testPath = examples
         examples = Example.readExamples(examples,False)
     if parameters != None:
         parameters = copy.copy(parameters)
         if parameters.has_key("c"):
             del parameters["c"]
         if parameters.has_key("predefined"):
             parameters = copy.copy(parameters)
             modelPath = os.path.join(parameters["predefined"][0],"classifier/model")
             del parameters["predefined"]
     # Read model
     if modelPath == None:
         modelPath = "model-multilabel"
     classModels = {}
     if modelPath.endswith(".gz"):
         f = gzip.open(modelPath, "rt")
     else:
         f = open(modelPath, "rt")
     thresholds = {}
     for line in f:
         key, value, threshold = line.split()
         classModels[key] = value
         if threshold != "None":
             thresholds[key] = float(threshold)
         else:
             thresholds[key] = 0.0
     f.close()
     mergedPredictions = []
     if type(classIds) == types.StringType:
         classIds = IdSet(filename=classIds)
     #print classModels
     print "Thresholds", thresholds
     classifierBin = Settings.SVMMultiClassDir+"/svm_multiclass_classify"
     print parameters
     if "classifier" in parameters and "svmperf" in parameters["classifier"]:
         classifierBin = Settings.SVMPerfDir+"/svm_perf_classify"
         parameters = copy.copy(parameters)
         del parameters["classifier"]
     for className in classIds.getNames():
         if className != "neg" and not "---" in className:
             classId = classIds.getId(className)
             if thresholds[str(className)] != 0.0:
                 print >> sys.stderr, "Classifying", className, "with threshold", thresholds[str(className)]
             else:
                 print >> sys.stderr, "Classifying", className
             args = [classifierBin]
             #self.__addParametersToSubprocessCall(args, parameters)
             classOutput = "predictions" + ".cls-" + className
             logFile = open("svmmulticlass" + ".cls-" + className + ".log","at")
             args += [testPath, classModels[str(className)], classOutput]
             print args
             subprocess.call(args, stdout = logFile, stderr = logFile)
             cls.addPredictions(classOutput, mergedPredictions, classId, len(classIds.Ids), threshold=thresholds[str(className)])
     print >> sys.stderr, timer.toString()
     
     predFileName = output
     f = open(predFileName, "wt")
     for mergedPred in mergedPredictions:
         if len(mergedPred[0]) > 1 and "1" in mergedPred[0]:
             mergedPred[0].remove("1")
         mergedPred[1] = str(mergedPred[1])
         mergedPred[0] = ",".join(sorted(list(mergedPred[0])))
         f.write(" ".join(mergedPred) + "\n")
     f.close()
     
     return mergedPredictions
Exemplo n.º 38
0
class Camera:
    def __init__(self, view_origin, view_dim, screen):
        self.view_plane = Plane(view_origin, view_dim)
        self.screen = screen
        self.zoom_in = Timer()
        self.zoom_out = Timer()
        self.move_left = Timer()
        self.move_right = Timer()
        self.move_up = Timer()
        self.move_down = Timer()
        self.zoom_max = 10
        self.zoom_min = 1
        self.move_sensitivity = 50
        self.zoom_sensitivity = 1

        self.key_events = {
            'r': self.toggle_zoom_in,
            'e': self.toggle_zoom_out,
            'right': self.toggle_move_right,
            'left': self.toggle_move_left,
            'up': self.toggle_move_up,
            'down': self.toggle_move_down
        }

    def update(self):
        self.move()
        self.zoom()

    def move(self):
        y_move = self.move_up.lap() - self.move_down.lap()
        x_move = self.move_right.lap() - self.move_left.lap()
        self.view_plane.origin.add(
            Vector(x_move * self.move_sensitivity,
                   y_move * self.move_sensitivity))

    def zoom(self):
        to_zoom = self.zoom_in.lap() - self.zoom_out.lap()
        to_zoom *= self.zoom_sensitivity
        self.view_plane.dim.add(Vector(to_zoom, to_zoom))

        self.screen.write_str(Vector(0, 0), self.view_plane.dim.__str__())

    def key_down(self, key):
        # self.screen.write_str(Vector(1, 1), 'down: ' + key)
        if key in self.key_events:
            self.key_events[key](True)

    def key_up(self, key):
        # self.screen.write_str(Vector(1, 1), 'up: ' + key)

        if key in self.key_events:
            self.key_events[key](False)

    def toggle_zoom_in(self, key_down):
        if key_down:
            if not self.zoom_in.running:
                self.zoom_in.start()
        else:
            self.screen.write_str(Vector(0, 7), self.zoom_in.poll().__str__())
            self.zoom_in.stop()

    def toggle_zoom_out(self, key_down):
        if key_down:
            if not self.zoom_out.running:
                self.zoom_out.start()
        else:
            self.zoom_out.stop()

    def toggle_move_left(self, key_down):
        if key_down:
            if not self.move_left.running:
                self.move_left.start()
        else:
            self.move_left.stop()

    def toggle_move_right(self, key_down):
        if key_down:
            if not self.move_right.running:
                self.move_right.start()
        else:
            self.move_right.stop()

    def toggle_move_up(self, key_down):
        if key_down:
            if not self.move_up.running:
                self.move_up.start()
        else:
            self.move_up.stop()

    def toggle_move_down(self, key_down):
        if key_down:
            if not self.move_down.running:
                self.move_down.start()
        else:
            self.move_down.stop()
Exemplo n.º 39
0
    optparser.add_option("-i", "--input", default=defaultAnalysisFilename, dest="input", help="Corpus in analysis format", metavar="FILE")
    optparser.add_option("-s", "--test", default=None, dest="input_test", help="Corpus in analysis format", metavar="FILE")
    optparser.add_option("-g", "--testGold", default=None, dest="input_test_gold", help="Corpus in analysis format", metavar="FILE")
    optparser.add_option("-o", "--output", default=None, dest="output", help="Output directory, useful for debugging")
    optparser.add_option("-c", "--classifier", default="SVMLightClassifier", dest="classifier", help="Classifier Class")
    optparser.add_option("-t", "--tokenization", default="split_gs", dest="tokenization", help="tokenization")
    optparser.add_option("-p", "--parse", default="split_gs", dest="parse", help="parse")
    optparser.add_option("-x", "--exampleBuilderParameters", default=None, dest="exampleBuilderParameters", help="Parameters for the example builder")
    optparser.add_option("-y", "--parameters", default=None, dest="parameters", help="Parameters for the classifier")
    optparser.add_option("-b", "--exampleBuilder", default="SimpleDependencyExampleBuilder", dest="exampleBuilder", help="Example Builder Class")
    optparser.add_option("-e", "--evaluator", default="BinaryEvaluator", dest="evaluator", help="Prediction evaluator class")
    optparser.add_option("-v", "--visualization", default=None, dest="visualization", help="Visualization output directory. NOTE: If the directory exists, it will be deleted!")
    optparser.add_option("-m", "--resultsToXML", default=None, dest="resultsToXML", help="Results in analysis xml. NOTE: for edges, pairs, not interactions")
    (options, args) = optparser.parse_args()
    
    mainTimer = Timer()
    print >> sys.stderr, __file__ + " start, " + mainTimer.toString()
    
    if options.output != None:
        if os.path.exists(options.output):
            print >> sys.stderr, "Output directory exists, removing", options.output
            shutil.rmtree(options.output)
        os.mkdir(options.output)
        if not os.path.exists(options.output+"/classifier"):
            os.mkdir(options.output+"/classifier")
    
    classifierParamDict = splitParameters(options.parameters)

    print >> sys.stderr, "Importing modules"
    exec "from ExampleBuilders." + options.exampleBuilder + " import " + options.exampleBuilder + " as ExampleBuilder"
    exec "from Classifiers." + options.classifier + " import " + options.classifier + " as Classifier"
Exemplo n.º 40
0
def optimizeCSC(Classifier, Evaluator, trainExamples, testExamples, classIds, combinations, workDir=None, timeout=None, cscConnection=None, downloadAllModels=False, steps="BOTH", threshold=False):
    bestResult = None
    combinationCount = 1
    combinationIds = []
    assert steps in ["BOTH", "SUBMIT", "RESULTS"], steps
    
    if type(classIds) == types.StringType:
        classIds = IdSet(filename=classIds)
    if Classifier.__name__ == "MultiLabelClassifier":
        negClass1 = True
        if "classifier" in combinations[0] and combinations[0]["classifier"] == "svmperf":
            negClass1 = False
        print "negclass1", negClass1
        Classifier.makeClassFiles(trainExamples, testExamples, classIds, negClass1=negClass1)
    
    if steps in ["BOTH", "SUBMIT"]:
        print >> sys.stderr, "Initializing runs"
        for combination in combinations:
            Stream.setIndent(" ")
            print >> sys.stderr, "Parameters "+str(combinationCount)+"/"+str(len(combinations))+":", str(combination)
            # Train
            combinationIds.append(Classifier.initTrainAndTestOnLouhi(trainExamples, testExamples, combination, cscConnection, workDir, classIds) )
            combinationCount += 1
    else:
        for combination in combinations:
            idStr = ""
            for key in sorted(combination.keys()):
                idStr += "-" + str(key) + "_" + str(combination[key])
            combinationIds.append(idStr)
    Stream.setIndent()
    
    if steps in ["BOTH", "RESULTS"]:
        Stream.setIndent(" ")
        print >> sys.stderr, "Waiting for results"
        finished = 0
        louhiTimer = Timer()
        #combinationStatus = {}
        while(True):
            # count finished
            finished = 0
            processStatus = {"FINISHED":0, "QUEUED":0, "FAILED":0, "RUNNING":0}
            for id in combinationIds:
                #status = Classifier.getLouhiStatus(id, cscConnection)
                #combinationStatus[id] = status
                #processStatus[status] += 1
                Classifier.getLouhiStatus(id, cscConnection, processStatus, classIds)
            p = processStatus
            processStatusString = str(p["QUEUED"]) + " queued, " + str(p["RUNNING"]) + " running, " + str(p["FINISHED"]) + " finished, " + str(p["FAILED"]) + " failed"
            if processStatus["QUEUED"] + processStatus["RUNNING"] == 0:
                print >> sys.stderr
                print >> sys.stderr, "All runs done (" + processStatusString + ")"
                break
            # decide what to do
            if timeout == None or louhiTimer.getElapsedTime() < timeout:
                sleepString = " [          ]     "
                print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString,
                #time.sleep(60)
                sleepTimer = Timer()
                while sleepTimer.getElapsedTime() < 60:
                    steps = int(10 * sleepTimer.getElapsedTime() / 60) + 1
                    sleepString = " [" + steps * "." + (10-steps) * " " + "]     "
                    print >> sys.stderr, "\rWaiting for " + str(len(combinations)) + " on " + cscConnection.machineName + "(" + processStatusString + "),", louhiTimer.elapsedTimeToString() + sleepString,
                    time.sleep(5)                
            else:
                print >> sys.stderr
                print >> sys.stderr, "Timed out, ", louhiTimer.elapsedTimeToString()
                break
        
        print >> sys.stderr, "Evaluating results"
        #if type(testExamples) != types.ListType:
        #    print >> sys.stderr, "Loading examples from file", testExamples
        #    testExamples = ExampleUtils.readExamples(testExamples,False)
        bestCombinationId = None
        for i in range(len(combinationIds)):
            id = combinationIds[i]
            Stream.setIndent(" ")
            # Evaluate
            predictions = Classifier.getLouhiPredictions(id, cscConnection, workDir, classIds)
            if predictions == None:
                print >> sys.stderr, "No results for combination" + id
            else:
                if downloadAllModels:
                    modelFileName = Classifier.downloadModel(id, cscConnection, workDir)
                    if workDir != None:
                        modelFileName = os.path.join(workDir, modelFileName)
                        subprocess.call("gzip -fv " + modelFileName, shell=True)
                print >> sys.stderr, "Evaluating results for combination" + id
                evaluationOutput = "evaluation" + id + ".csv"
                if workDir != None:
                    evaluationOutput = os.path.join(workDir, evaluationOutput)
                evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput)
                if threshold:
                    print >> sys.stderr, "Thresholding"
                    evaluator.determineThreshold(testExamples, predictions)
                if Classifier.__name__ != "MultiLabelClassifier":
                    if bestResult == None or evaluator.compare(bestResult[0]) > 0: #: averageResult.fScore > bestResult[1].fScore:
                        bestResult = [evaluator, None, predictions, evaluationOutput, combinations[i]]
                        bestCombinationId = id
                else:
                    assert Evaluator.__name__ == "MultiLabelEvaluator", Evaluator.__name__
                    if bestResult == None:
                        bestResult = [{}, None]
                        for className in classIds.Ids:
                            if className != "neg" and "---" not in className:
                                bestResult[0][className] = [-1, None, classIds.getId(className), None]
                    for className in classIds.Ids:
                        if className != "neg" and "---" not in className:
                            fscore = evaluator.dataByClass[classIds.getId(className)].fscore
                            if fscore > bestResult[0][className][0]:
                                bestResult[0][className] = [fscore, id, bestResult[0][className][2]]
                                if threshold:
                                    classId = classIds.getId(className, False)
                                    if classId in evaluator.thresholds:
                                        bestResult[0][className].append(evaluator.thresholds[classId])
                                    else:
                                        bestResult[0][className].append(0.0)
                                else:
                                    bestResult[0][className].append(None)
                    bestCombinationId = bestResult
                os.remove(predictions) # remove predictions to save space
        Stream.setIndent()
        print >> sys.stderr, "Selected parameters", bestResult[-1]
        #if Classifier.__name__ == "MultiLabelClassifier":
        #    evaluator = Evaluator.evaluate(testExamples, predictions, classIds, evaluationOutput)
    
        # Download best model and predictions
        modelFileName = Classifier.downloadModel(bestCombinationId, cscConnection, workDir)
        if workDir != None:
            modelFileName = os.path.join(workDir, modelFileName)
        subprocess.call("gzip -fv " + modelFileName, shell=True)
        modelFileName = modelFileName + ".gz"
        #if Classifier.__name__ != "MultiLabelClassifier":
            #bestResult = [None, None]
        bestResult[1] = modelFileName
        return bestResult