Exemplo n.º 1
0
    def __init__(self, model, population_size,n_blobs, n_features,home_district_in_position,iseed=None):

        self.model = model
        self.infected_blobs=[]
        self.roulette_distribution ={}
        self.feature_vector = {}
        self.vector_to_human = {}
        self.vector_to_home ={}
        self.vector_to_classroom = {}
        self.vector_to_office = {}
        self.vector_to_restaurant = {}
        self.unit_info_map = self.unit_info_map()
        self.teachers_per_classroom = 1
        self.strid_to_human = self.strid_to_human()
        n_vec = population_size 
        blobs,assignments = make_blobs(
            n_samples=n_vec,
            n_features=n_features,
            centers=n_blobs,
            cluster_std=0.1,#1.0
            center_box=(-10.0, 10.0),
            shuffle=False,
            random_state=iseed
        )
        self.n_blobs = n_blobs
        self.home_district_in_position = home_district_in_position
        self.blob_dict ={}
        self.vector_to_blob = {}
        for vec,assignment in zip(blobs,assignments):
            if assignment not in self.blob_dict:
                self.blob_dict[assignment] = []
            self.vector_to_blob[tuple(vec)] = assignment
            self.blob_dict[assignment].append(vec)
        self.vectors = blobs
        #self.vectors = KeyedVectors(n_features)
        #numlist = range(n_vec)
        #self.vectors.add(numlist,blobs[:])
        #for i in range(n_vec):
            #self.vectors.add_vector(i, blobs[i,:])
            #vectors.add_vector(str(i), blobs[i,:])
        #print (numlist)
        #print(blobs)
        #print (self.vectors)
        for i in range(n_vec):
            #vector1 = self.vectors.get_vector(i)
            vector1 = self.vectors[i]
            tuple_vec1 = tuple(vector1)
            similarities = KeyedVectors.cosine_similarities(vector1,self.vectors)
            #print (distances)
            #distances = self.vectors.cosine_similarities(vector1,self.vectors)
            #self.roulette_distribution[tuple_vec1] = {}
            temp ={}
            sum_similarities = (similarities-similarities.min()).sum()
            for j in range(n_vec):
                if i != j:
                    vector2 = self.vectors[j]
                    tuple_vec2 = tuple(vector2)
                    temp[tuple_vec2] = (similarities[j] - similarities.min()) / sum_similarities
            
            self.roulette_distribution[tuple_vec1]=dict(sorted(temp.items(), key=lambda item: -item[1]))
Exemplo n.º 2
0
     if sim.startswith('manual') and bias[0] != 'zero':
         vocab = [
             word for word in bias_labels[bias[0]] if word in labels
         ]
         vectors_all = np.asarray(
             [word_vectors[word] for word in vocab])
         vector_1 = word_vectors[label]
         if sim.endswith('reject') and len(bias) > 2:
             bias_vector = word_vectors[bias[1]] - word_vectors[
                 bias[2]]
             bias_vector /= np.linalg.norm(bias_vector)
             vectors_all -= np.outer(
                 np.dot(vectors_all, bias_vector), bias_vector)
             vector_1 = vector_1 - np.dot(vector_1,
                                          bias_vector) * bias_vector
         scores = word_vectors.cosine_similarities(
             vector_1, vectors_all)
         scores, vocab = zip(
             *sorted(zip(scores, vocab),
                     key=lambda x: (-x[0], label != x[1], x[1] in
                                    secondary_labels))[:topk])
         f.write('%s %s ' % (label, bias[0]) +
                 ' '.join('%.4f %s' % (scores[k], vocab[k])
                          for k in range(len(vocab))) + '\n')
         if scores[0] >= threshold:
             biased = [
                 v for v, s in zip(vocab, scores) if s > threshold
             ]
     elif len(bias) > 1:
         biased = get_bias(label, bias[1:], sim, threshold)
     row.append('/'.join(biased))
 rows.append(row)
Exemplo n.º 3
0
 def similarity(self, tup_vec1, tup_vec2):
     vec1 = np.array(list(tup_vec1))
     vec2 = np.array(list(tup_vec2))
     sim = KeyedVectors.cosine_similarities(vec1, [vec2])
     return sim[0]