Exemplos de FlagIndexArray.addFlagIndexArray em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: smp.smp

Classe / Tipo: FlagIndexArray

Método / Função: addFlagIndexArray

Exemplos em hotexamples.com: 2

FlagIndexArray.addFlagIndexArray em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de smp.smp.FlagIndexArray.addFlagIndexArray em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

FlagIndexArray(3)

addSingles(2)

addFlagIndexArray(1)

flagCount(1)

flagIndex(1)

getLength(1)

Métodos Frequentes

FlagIndexArray (3)

addSingles (2)

addFlagIndexArray (1)

flagCount (1)

flagIndex (1)

getLength (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: solve_shared.py Projeto: zerocolar/Project_Code

def addPrior(self, sample): """Given a Sample object this uses it as a prior - this is primarilly used to sample a single or small number of documents using a model already trainned on another set of documents. It basically works by adding the topics, clusters and behaviours from the sample into this corpus, with the counts all intact so they have the relevant weight and can't be deleted. Note that you could in principle add multiple priors, though that would be quite a strange scenario. If only called once then the topic indices will line up. Note that all the prior parameters are not transfered, though often you would want to - setGlobalParams is provided to do this. Must be called before any Gibbs sampling takes place.""" # Below code has evolved into spagetti, via several other tasty culinary dishes, and needs a rewrite. Or to never be looked at or edited ever again. ################### # Do the topics... offset = self.topicWord.shape[0] if self.topicWord.shape[0]!=0: self.topicWord = numpy.vstack((self.topicWord,sample.topicWord)) else: self.topicWord = sample.topicWord.copy() self.topicUse = numpy.hstack((self.topicUse,sample.topicUse)) # Calculate the new abnormalities dictionary... newAbnorms = dict(sample.abnorms) for key,_ in self.abnorms.iteritems(): if key not in newAbnorms: val = len(newAbnorms)+1 newAbnorms[key] = val # Transfer over the abnormal word counts... newAbnormTopicWord = numpy.zeros((1+len(newAbnorms), max((self.abnormTopicWord.shape[1], sample.abnormTopicWord.shape[1]))), dtype=numpy.int32) for abnorm,origin in self.abnorms.iteritems(): dest = newAbnorms[abnorm] limit = self.abnormTopicWord.shape[1] newAbnormTopicWord[dest,:limit] += self.abnormTopicWord[origin,:limit] for abnorm,origin in sample.abnorms.iteritems(): dest = newAbnorms[abnorm] limit = sample.abnormTopicWord.shape[1] newAbnormTopicWord[dest,:limit] += sample.abnormTopicWord[origin,:limit] # Update the document flags/counts for behaviours... for doc in self.doc: newFlags = numpy.zeros(1+len(newAbnorms), dtype=numpy.uint8) newCounts = numpy.zeros(1+len(newAbnorms), dtype=numpy.int32) newFlags[0] = doc.behFlags[0] newCounts[0] = doc.behCounts[0] for abnorm,origin in self.abnorms.iteritems(): dest = newAbnorms[abnorm] newFlags[dest] = doc.behFlags[origin] newCounts[dest] = doc.behCounts[origin] doc.behFlags = newFlags doc.behCounts = newCounts # Update the old clusters behaviour arrays... def mapOldCluster(c): c2 = numpy.ones(1+len(newAbnorms), dtype=numpy.float32) c2 /= c2.sum() c2[0] *= c[2][0] for abnorm,origin in self.abnorms.iteritems(): dest = newAbnorms[abnorm] c2[dest] *= c[2][origin] c2 /= c2.sum() return (c[0],c[1],c2,c[3]) self.cluster = map(mapOldCluster ,self.cluster) origCluCount = len(self.cluster) # Add the new clusters, updating their behaviour arrays and topic indices, plus getting their priors updated with their associated documents... def mapCluster(pair): ci, c = pair c0 = c[0].copy() c0[:,0] += offset c2 = numpy.ones(1+len(newAbnorms), dtype=numpy.float32) c2 /= c2.sum() c2[0] *= c[2][0] for abnorm,origin in sample.abnorms.iteritems(): dest = newAbnorms[abnorm] c2[dest] *= c[2][origin] c2 /= c2.sum() c3 = c[3].copy() for doc in filter(lambda doc: doc.cluster==ci, sample.doc): fi = sample.fia.flagIndex(doc.behFlags, False) if fi>=len(doc.behFlags): # Only bother if the document has abnormalities, of which this is a valid test. total = 0 for i in xrange(doc.dp.shape[0]): c3[doc.dp[i,0]] += doc.dp[i,2] total += doc.dp[i,2] c3[fi] -= total + 1 return (c0,c[1],c2,c3) self.cluster += map(mapCluster, enumerate(sample.cluster)) self.clusterUse = numpy.hstack((self.clusterUse, sample.clusterUse)) # Update phi... newPhi = numpy.ones(len(newAbnorms)+1,dtype=numpy.float32) newPhi[0] = 0.5*(self.phi[0]+sample.phi[0]) for abnorm,origin in self.abnorms.iteritems(): dest = newAbnorms[abnorm] newPhi[dest] = self.phi[origin] for abnorm,origin in sample.abnorms.iteritems(): dest = newAbnorms[abnorm] if abnorm not in self.abnorms: newPhi[dest] = sample.phi[origin] else: newPhi[dest] = 0.5*(newPhi[dest] + sample.phi[origin]) self.phi = newPhi self.phi /= self.phi.sum() # Recreate the flag index array... remapOrig = dict() # Old flag positions to new flag positions. remapOrig[0] = 0 for abnorm,origin in self.abnorms.iteritems(): remapOrig[origin] = newAbnorms[abnorm] remapSam = dict() # sample flag positions to new flag positions. remapSam[0] = 0 for abnorm,origin in sample.abnorms.iteritems(): remapSam[origin] = newAbnorms[abnorm] newFia = FlagIndexArray(len(newAbnorms)+1) newFia.addSingles() behIndAdjOrig = newFia.addFlagIndexArray(self.fia,remapOrig) behIndAdjSam = newFia.addFlagIndexArray(sample.fia,remapSam) for doc in self.doc: doc.behFlagsIndex = behIndAdjOrig[doc.behFlagsIndex] # Update cluster priors on bmn arrays... for c in xrange(len(self.cluster)): clu = self.cluster[c] newBmn = numpy.zeros(newFia.flagCount(),dtype=numpy.int32) oldBmn = clu[3].copy() # Transilate from old set... for b in xrange(oldBmn.shape[0]): index = behIndAdjOrig[b] if c<origCluCount else behIndAdjSam[b] newBmn[index] += oldBmn[b] self.cluster[c] = (clu[0], clu[1], clu[2], newBmn) # Replace the old abnormality and fia stuff... self.abnormTopicWord = newAbnormTopicWord self.abnorms = newAbnorms self.fia = newFia

Exemplo n.º 2

0

Exibir arquivo

Arquivo: solve_shared.py Projeto: zoginni/helit

def addPrior(self, sample): """Given a Sample object this uses it as a prior - this is primarilly used to sample a single or small number of documents using a model already trainned on another set of documents. It basically works by adding the topics, clusters and behaviours from the sample into this corpus, with the counts all intact so they have the relevant weight and can't be deleted. Note that you could in principle add multiple priors, though that would be quite a strange scenario. If only called once then the topic indices will line up. Note that all the prior parameters are not transfered, though often you would want to - setGlobalParams is provided to do this. Must be called before any Gibbs sampling takes place.""" # Below code has evolved into spagetti, via several other tasty culinary dishes, and needs a rewrite. Or to never be looked at or edited ever again. ################### # Do the topics... offset = self.topicWord.shape[0] if self.topicWord.shape[0] != 0: self.topicWord = numpy.vstack((self.topicWord, sample.topicWord)) else: self.topicWord = sample.topicWord.copy() self.topicUse = numpy.hstack((self.topicUse, sample.topicUse)) # Calculate the new abnormalities dictionary... newAbnorms = dict(sample.abnorms) for key, _ in self.abnorms.iteritems(): if key not in newAbnorms: val = len(newAbnorms) + 1 newAbnorms[key] = val # Transfer over the abnormal word counts... newAbnormTopicWord = numpy.zeros( (1 + len(newAbnorms), max((self.abnormTopicWord.shape[1], sample.abnormTopicWord.shape[1]))), dtype=numpy.int32) for abnorm, origin in self.abnorms.iteritems(): dest = newAbnorms[abnorm] limit = self.abnormTopicWord.shape[1] newAbnormTopicWord[dest, :limit] += self.abnormTopicWord[ origin, :limit] for abnorm, origin in sample.abnorms.iteritems(): dest = newAbnorms[abnorm] limit = sample.abnormTopicWord.shape[1] newAbnormTopicWord[dest, :limit] += sample.abnormTopicWord[ origin, :limit] # Update the document flags/counts for behaviours... for doc in self.doc: newFlags = numpy.zeros(1 + len(newAbnorms), dtype=numpy.uint8) newCounts = numpy.zeros(1 + len(newAbnorms), dtype=numpy.int32) newFlags[0] = doc.behFlags[0] newCounts[0] = doc.behCounts[0] for abnorm, origin in self.abnorms.iteritems(): dest = newAbnorms[abnorm] newFlags[dest] = doc.behFlags[origin] newCounts[dest] = doc.behCounts[origin] doc.behFlags = newFlags doc.behCounts = newCounts # Update the old clusters behaviour arrays... def mapOldCluster(c): c2 = numpy.ones(1 + len(newAbnorms), dtype=numpy.float32) c2 /= c2.sum() c2[0] *= c[2][0] for abnorm, origin in self.abnorms.iteritems(): dest = newAbnorms[abnorm] c2[dest] *= c[2][origin] c2 /= c2.sum() return (c[0], c[1], c2, c[3]) self.cluster = map(mapOldCluster, self.cluster) origCluCount = len(self.cluster) # Add the new clusters, updating their behaviour arrays and topic indices, plus getting their priors updated with their associated documents... def mapCluster(pair): ci, c = pair c0 = c[0].copy() c0[:, 0] += offset c2 = numpy.ones(1 + len(newAbnorms), dtype=numpy.float32) c2 /= c2.sum() c2[0] *= c[2][0] for abnorm, origin in sample.abnorms.iteritems(): dest = newAbnorms[abnorm] c2[dest] *= c[2][origin] c2 /= c2.sum() c3 = c[3].copy() for doc in filter(lambda doc: doc.cluster == ci, sample.doc): fi = sample.fia.flagIndex(doc.behFlags, False) if fi >= len( doc.behFlags ): # Only bother if the document has abnormalities, of which this is a valid test. total = 0 for i in xrange(doc.dp.shape[0]): c3[doc.dp[i, 0]] += doc.dp[i, 2] total += doc.dp[i, 2] c3[fi] -= total + 1 return (c0, c[1], c2, c3) self.cluster += map(mapCluster, enumerate(sample.cluster)) self.clusterUse = numpy.hstack((self.clusterUse, sample.clusterUse)) # Update phi... newPhi = numpy.ones(len(newAbnorms) + 1, dtype=numpy.float32) newPhi[0] = 0.5 * (self.phi[0] + sample.phi[0]) for abnorm, origin in self.abnorms.iteritems(): dest = newAbnorms[abnorm] newPhi[dest] = self.phi[origin] for abnorm, origin in sample.abnorms.iteritems(): dest = newAbnorms[abnorm] if abnorm not in self.abnorms: newPhi[dest] = sample.phi[origin] else: newPhi[dest] = 0.5 * (newPhi[dest] + sample.phi[origin]) self.phi = newPhi self.phi /= self.phi.sum() # Recreate the flag index array... remapOrig = dict() # Old flag positions to new flag positions. remapOrig[0] = 0 for abnorm, origin in self.abnorms.iteritems(): remapOrig[origin] = newAbnorms[abnorm] remapSam = dict() # sample flag positions to new flag positions. remapSam[0] = 0 for abnorm, origin in sample.abnorms.iteritems(): remapSam[origin] = newAbnorms[abnorm] newFia = FlagIndexArray(len(newAbnorms) + 1) newFia.addSingles() behIndAdjOrig = newFia.addFlagIndexArray(self.fia, remapOrig) behIndAdjSam = newFia.addFlagIndexArray(sample.fia, remapSam) for doc in self.doc: doc.behFlagsIndex = behIndAdjOrig[doc.behFlagsIndex] # Update cluster priors on bmn arrays... for c in xrange(len(self.cluster)): clu = self.cluster[c] newBmn = numpy.zeros(newFia.flagCount(), dtype=numpy.int32) oldBmn = clu[3].copy() # Transilate from old set... for b in xrange(oldBmn.shape[0]): index = behIndAdjOrig[b] if c < origCluCount else behIndAdjSam[ b] newBmn[index] += oldBmn[b] self.cluster[c] = (clu[0], clu[1], clu[2], newBmn) # Replace the old abnormality and fia stuff... self.abnormTopicWord = newAbnormTopicWord self.abnorms = newAbnorms self.fia = newFia