Exemplo n.º 1
0
 def __log_add(self, left, right):
     if (right < left):
         return left + math.log1p(math.exp(right - left))
     elif (right > left):
         return right + math.log1p(math.exp(left - right))
     else:
         return left + self.M_LN2
Exemplo n.º 2
0
def my_features(u, t):
    vector = []
    #vector += [float(edits_num(u, t, p)) for p in periods]
    vector += [math.log1p(user_lastedit[(u,t)]-user_frstedit[(u,t)])]
    vector += [math.log1p(numpy.reciprocal(numpy.mean(user_edit_freq[u])))] 
    #vector += [float(artis_num(u, t, p)) for p in periods]
    return vector
Exemplo n.º 3
0
	def logadd(self, alpha):
		x = alpha[0]
		y = alpha[1]
		if y <= x:
			return x + math.log1p(math.exp(y-x))
		else:
			return y + math.log1p(math.exp(x-y))
Exemplo n.º 4
0
def B(x):
    y=None
    if math.sin(x/(x**2+2))+math.exp(math.log1p(x)+1)==0 or x==0:
        y='Neopredelen'
    else:
         y=(1/(math.sin(x/(x**2+2))+math.exp(math.log1p(x)+1)))-1
    return y
Exemplo n.º 5
0
def testing(test_list, vocabulary, P1, P2, pp, np):
	result_value = []
	for test in test_list:
		test = test.split()
		
#		print test
		
		sum_of_pprob = 0
		sum_of_nprob = 0
		for word in test:
			if vocabulary.count(word):
				index = vocabulary.index(word)
				sum_of_pprob += math.log1p(pp[index])
				sum_of_nprob += math.log1p(np[index])

		positiveProbability = sum_of_pprob + math.log1p(P1)
		negativeProbability = sum_of_nprob + math.log1p(P2)
		
#		print positiveProbability
#		print negativeProbability
#		input('probability')

		if positiveProbability >= negativeProbability:
			result_value.append('+')
		else:
			result_value.append('-')
	return result_value
Exemplo n.º 6
0
def transition(dist, a, f, logspace=0):
    """
    Compute transition probabilities for a HMM. 
    
    to compute transition probabilities between hidden-states,
    when moving from time t to t+1,
    the genetic distance (cM) between the two markers are required.
    
    Assuming known parameters a and f.
    lf logspace = 1, calculations are log-transformed.
    
    Key in dictionary: 0 = not-IBD, 1 = IBD.
    """
    if logspace == 0:
        qk = exp(-a*dist)

        T = { # 0 = not-IBD, 1 = IBD
            1: {1: (1-qk)*f + qk, 0: (1-qk)*(1-f)},
            0: {1: (1-qk)*f, 0: (1-qk)*(1-f) + qk}
            }
        
    else:
        if dist == 0:
            dist = 1e-06

        ff = 1-f
        ad = a*dist
        A = expm1(ad)
        AA = -expm1(-ad)
        T = { # 0 = not-IBD, 1 = IBD
            1: {1: log1p(A*f)-ad, 0: log(AA*ff)},
            0: {1: log(AA*f), 0: log1p(A*ff)-ad}}
    return T
Exemplo n.º 7
0
    def test_log1p(self):
        import math

        self.ftest(math.log1p(1 / math.e - 1), -1)
        self.ftest(math.log1p(0), 0)
        self.ftest(math.log1p(math.e - 1), 1)
        self.ftest(math.log1p(1), math.log(2))
Exemplo n.º 8
0
def log_sum(left,right):
	if right < left:
		return left + log1p(exp(right - left))
	elif left < right:
		return right + log1p(exp(left - right));
	else:
		return left + log1p(1)
Exemplo n.º 9
0
def log_add(left, right):
    if (right < left):
        return left + math.log1p(math.exp(right - left))
    elif (right > left):
        return right + math.log1p(math.exp(left - right))
    else:
        return left + M_LN2
Exemplo n.º 10
0
def mandelbrot(n):
	z = complex(0,0)
	for i in range(max_iter):
		z = complex.add(complex.multiply(z,z),n)
		if cabs(z) >= escape_radius:
			smooth = i + 1 - int(math.log1p(math.log1p(cabs(z)))/math.log1p(escape_radius))
			return int(smooth)
	return True
Exemplo n.º 11
0
def drift(active_editors_test, grouped_edits, time_train, time_test):
    """Calculate drift
    """
    average_train = sum([math.log1p(count_edits(grouped_edits[editor], time_train, 5))
                         for editor in active_editors_test])/len(active_editors_test)
    average_test = sum([math.log1p(count_edits(grouped_edits[editor], time_test, 5))
                        for editor in active_editors_test])/len(active_editors_test)
    return average_test - average_train
Exemplo n.º 12
0
 def test_log1p(self):
     import math
     self.ftest(math.log1p(1/math.e-1), -1)
     self.ftest(math.log1p(0), 0)
     self.ftest(math.log1p(math.e-1), 1)
     self.ftest(math.log1p(1), math.log(2))
     raises(ValueError, math.log1p, -1)
     raises(ValueError, math.log1p, -100)
Exemplo n.º 13
0
def logadd(x, y):
    if y == 0:
        return x
    if x == 0:
        return y
    if y <= x:
        return x + math.log1p(math.exp(y - x))
    else:
        return y + math.log1p(math.exp(x - y))
Exemplo n.º 14
0
 def __call__(self, response, a, result):
     text = get_text_from_html(a)
     this_features = defaultdict(float)
     norm = 0.0
     for token in parse_string(text, self.default_language):
         this_features[self.prefix + "__" + token] += 1.0
         norm += 1
     norm = log1p(norm)
     result.features.update((t, log1p(c) / norm) for t, c in this_features.viewitems())
     return result
Exemplo n.º 15
0
def compute_scale_for_cesium(coordmin, coordmax):
    '''
    Cesium quantized positions need to be in uint16
    This function computes the best scale to apply to coordinates
    to fit the range [0, 65535]
    '''
    max_int = np.iinfo(np.uint16).max
    delta = abs(coordmax - coordmin)
    scale = 10 ** -(math.floor(math.log1p(max_int / delta) / math.log1p(10)))
    return scale
Exemplo n.º 16
0
def geometric(data, p):
    denom = math.log1p(-p)
    data.start_example(GEOMETRIC_LABEL)
    while True:
        probe = fractional_float(data)
        if probe < 1.0:
            result = int(math.log1p(-probe) / denom)
            assert result >= 0, (probe, p, result)
            data.stop_example()
            return result
def next_temp(initial_temp, iteration, max_iteration, current_temp, slope=None, standard_deviation=None):
    if Config.SA_AnnealingSchedule == 'Linear':
        temp = (float(max_iteration-iteration)/max_iteration)*initial_temp
        print ("\033[36m* COOLING::\033[0m CURRENT TEMP: "+str(temp))
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Exponential':
        temp = current_temp * Config.SA_Alpha
        print ("\033[36m* COOLING::\033[0m CURRENT TEMP: "+str(temp))
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Logarithmic':
        # this is based on "A comparison of simulated annealing cooling strategies"
        # by Yaghout Nourani and Bjarne Andresen
        temp = Config.LogCoolingConstant * (1.0/log10(1+(iteration+1)))     # iteration should be > 1 so I added 1
        print ("\033[36m* COOLING::\033[0m CURRENT TEMP: "+str(temp))
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Adaptive':
        temp = current_temp
        if iteration > Config.CostMonitorQueSize:
            if 0 < slope < Config.SlopeRangeForCooling:
                temp = current_temp * Config.SA_Alpha
                print ("\033[36m* COOLING::\033[0m CURRENT TEMP: "+str(temp))
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Markov':
        temp = initial_temp - (iteration/Config.MarkovNum)*Config.MarkovTempStep
        if temp < current_temp:
            print ("\033[36m* COOLING::\033[0m CURRENT TEMP: "+str(temp))
        if temp <= 0:
            temp = current_temp
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Aart':
        # This is coming from the following paper:
        # Job Shop Scheduling by Simulated Annealing Author(s): Peter J. M. van Laarhoven,
        # Emile H. L. Aarts, Jan Karel Lenstra
        if iteration % Config.CostMonitorQueSize == 0 and standard_deviation is not None and standard_deviation != 0:
            temp = float(current_temp)/(1+(current_temp*(log1p(Config.Delta)/standard_deviation)))
            print ("\033[36m* COOLING::\033[0m CURRENT TEMP: "+str(temp))
        elif standard_deviation == 0:
            temp = float(current_temp)*Config.SA_Alpha
            print ("\033[36m* COOLING::\033[0m CURRENT TEMP: "+str(temp))
        else:
            temp = current_temp
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Huang':
        if standard_deviation is not None and standard_deviation != 0:
            temp = float(current_temp)/(1+(current_temp*(log1p(Config.Delta)/standard_deviation)))
            print ("\033[36m* COOLING::\033[0m CURRENT TEMP: "+str(temp))
        elif standard_deviation == 0:
            temp = float(current_temp)*Config.SA_Alpha
            print ("\033[36m* COOLING::\033[0m CURRENT TEMP: "+str(temp))
        else:
            temp = current_temp
#   ----------------------------------------------------------------
    else:
        raise ValueError('Invalid Cooling Method for SA...')
    return temp
Exemplo n.º 18
0
 def _get_thruput(_ce_endpoint):
     if _ce_endpoint not in worker_ce_backend_throughput_dict:
         q_good_init = 0.
         q_good_fin = 0.
     else:
         q_good_init = float(sum(worker_ce_backend_throughput_dict[_ce_endpoint][_st]
                                 for _st in ('submitted', 'running', 'finished')))
         q_good_fin = float(sum(worker_ce_backend_throughput_dict[_ce_endpoint][_st]
                                 for _st in ('submitted',)))
     thruput = (log1p(q_good_init) - log1p(q_good_fin))
     return thruput
Exemplo n.º 19
0
    def get_ir_distances(self):
        """Converts the IR distance readings into a distance in meters"""
        
        ir_distances = [ \
            max( min( (log1p(3960) - log1p(reading))/30 + 
                       self.robot.ir_sensors.rmin,
                      self.robot.ir_sensors.rmax),
                 self.robot.ir_sensors.rmin)
            for reading in self.robot.ir_sensors.readings ]

        return ir_distances
Exemplo n.º 20
0
    def getIRDistance(self, robotInfo_):
        """Converts the IR distance readings into a distance in meters.
        """
        # Get the current parameters of the sensor
        readings = robotInfo_["sensors"]["ir"]["readings"]
        rmin = robotInfo_["sensors"]["ir"]["rmin"]
        rmax = robotInfo_["sensors"]["ir"]["rmax"]

        #   Conver the readings to a distance (in m)
        dists = [max( min( (log1p(3960) - log1p(r))/30 + rmin, rmax), rmin) for r in readings]
        return dists
Exemplo n.º 21
0
def get_stream_rating(broadcast_id):
    periscope_api = PeriscopeAPI()
    users = periscope_api.getBroadcastUsers(broadcast_id)

    rating = 0
    for user in users['live']:
        rating += log1p(user['n_followers']) * (1 + 0.05 * user['n_hearts_given'])
    for user in users['replay']:
        rating += log1p(user['n_followers']) * (1 + 0.05 * user['n_hearts_given']) * 0.6
    rating += 0.1 * users['n_web_watched']
    return round(rating)
Exemplo n.º 22
0
 def contrast(self,beta):
     for x in range(self.width):
         for y in range(self.height):
             gray, gray1, gray2 = self.imag.getpixel((x,y))
             maxim=max(gray,gray1,gray2)
             #minim=min(gray,gray1,gray2)
             q=int((maxim*((log1p(maxim+gray))/(log1p(maxim+maxim)))))
             #q=q*gray+beta
             self.newImag.putpixel((x,y),(q,q,q))
     self.newImag.save("testContrast1.jpg",self.imag.format)
     self.newImag.show()
     print("Done...")
Exemplo n.º 23
0
def logadd(a,b):
    """
    compute log(exp(a) + exp(b))
    """
    if a == -INFINITY:
        return b
    if b == -INFINITY:
        return a
    if b < a: # b - a < 0
        return a + math.log1p(math.exp(b - a))
    else: # a - b < 0
        return b + math.log1p(math.exp(a - b))
Exemplo n.º 24
0
def inverse_log_fq(token, sent):
    '''
    calculate the penalty for the token's inverse log frequency
    :param token:
    :param sent:
    :return:
    '''
    fdist = nltk.FreqDist(sent)
    if log1p(fdist[token]):
        return 1/log1p(fdist[token])
    else:
        return 0
Exemplo n.º 25
0
def d1Calculation():
        first_part = stock_price / strike_price
        math.log1p(first_part)
 
        variance = standard_deviation * standard_deviation
        second_part = ( risk_free_rate + variance / 2 ) * time
 
        n_root = (1.0 / standard_deviation)
        third_part = math.pow(time,n_root)
 
        d1_answer = ( first_part + second_part ) / third_part
        return d1_answer
Exemplo n.º 26
0
    def get_ir_distances(self):
        """Converts the IR distance readings into a distance in meters"""
        default_value = 3960
        #Assignment week2
        ir_distances = [] #populate this list
        #self.robot.ir_sensors.readings | (may want to use this)
        for reading in self.robot.ir_sensors.readings:
            val = max( min( (log1p(3960) - log1p(reading))/30 + 0.02 , 3960) , 0.02)
            ir_distances.append(val) 

        #End Assignment week2
        return ir_distances
Exemplo n.º 27
0
def segi(x):

    """ 
    T1.1.T2.4. these are for sequoia in seq ntl park
    proxy: none
    badness: no foliage
    source: biopak, equation 395
    accuracy: biomass at 0.95, density is stated to be 0.358
    """

    biomass = math.exp(-11.0174 + 2.5907 * math.log1p(float(x)))
    jenkbio = round(0.001*math.exp(-2.2304+2.4435*math.log1p(round(x,2))),4)

    return (biomass, jenkbio)
Exemplo n.º 28
0
def testing(test_list, univocabulary, bivocabulary, P1, P2, upp, unp, bpp, bnp):
	result_value = []
	for test in test_list:
		test = test.split()
		if univocabulary.count(test[0]) == 0:
			sum_of_pprob = 0
			sum_of_nprob = 0
		else:
			sum_of_pprob = math.log1p(upp[univocabulary.index(test[0])])
			sum_of_nprob = math.log1p(unp[univocabulary.index(test[0])])
		for i in range(len(test)-1):
			word = test[i]+" "+test[i+1]
			if bivocabulary.count(word):
				index = bivocabulary.index(word)
				sum_of_pprob += math.log1p(bpp[index])
				sum_of_nprob += math.log1p(bnp[index])
			else:
				if univocabulary.count(test[i+1]):
					index = univocabulary.index(test[i+1])
					sum_of_pprob += math.log1p(upp[index])
					sum_of_nprob += math.log1p(unp[index])

		positiveProbability = sum_of_pprob + math.log1p(P1)
		negativeProbability = sum_of_nprob + math.log1p(P2)
		if positiveProbability >= negativeProbability:
			result_value.append('+')
		else:
			result_value.append('-')
	return result_value
def log_add(first_logarithm, second_logarithm):
    '''
    Add to real numbers in log-space. This function should preferebly be used with
    the logarithms of small real values.
    
    @param first_logarithm: The logarithm of the first number
    @param second_logarithm: The logarithm of the second number
    @return: The logarithm of the sum of the two numbers     
    ''' 
    
    if first_logarithm >= second_logarithm:
        return second_logarithm + log1p(exp(first_logarithm - second_logarithm))
    else:
        return first_logarithm + log1p(exp(second_logarithm - first_logarithm))
Exemplo n.º 30
0
 def atanh(x):
     "NOT_RPYTHON"
     if isnan(x):
         return x
     absx = abs(x)
     if absx >= 1.:
         raise ValueError("math domain error")
     if absx < _2_to_m28:
         return x
     if absx < .5:
         t = absx + absx
         t = .5 * log1p(t + t * absx / (1. - absx))
     else:
         t = .5 * log1p((absx + absx) / (1. - absx))
     return copysign(t, x)
Exemplo n.º 31
0
 def ln(self, x):           return math.log1p(x)
 def logx(self, x, base):   return math.log(x,base)
Exemplo n.º 32
0
def word_meaning(vector_set):

    cls_set = {}
    corpus_freq_w = [0] * (len(vector_set[0]) - 1)
    for v in vector_set:
        if v[len(v) -
             1] not in cls_set:  # last item of each vector indicates its label
            cls_set.update({v[len(v) - 1]:
                            v[:len(v) - 1]})  # v[:len(v)-1] indicates vector
        else:
            cls_set[v[len(v) - 1]] = np.add(cls_set[v[len(v) - 1]],
                                            v[:len(v) - 1])
            # Each vector contains the frequency of words, we can find the number of each words in a vector defined by
            # the label of class, by iteratively summing them up... v[len(v)- 1] indicates the label

        corpus_freq_w = np.add(corpus_freq_w, v[:len(v) - 1])

    # Compute the number of words in each class and collect it in the set cls_w_length
    cls_w_lengths = {}
    for cl in cls_set:
        count = 0
        for w in cls_set[cl]:
            count += w

        cls_w_lengths.update({cl: count})

    # Compute the whole number of words in the corpus (training set)
    corpus_length = 0

    for cl_freq in cls_w_lengths:
        corpus_length += cls_w_lengths[cl_freq]

    # Calculates meaning values of each word for each class. At first calculates values for each word in each class and
    # then collect them in the set cls_meaning
    l = corpus_length
    cls_meaning = {}
    for cl_lbl in cls_set:

        b = cls_w_lengths[cl_lbl]

        if l != 0 and b != 0:  # Check the exception condition L = 0 and B = 0
            n = l / b
        else:
            n = 1

        length = len(cls_set[cl_lbl])
        meaning_vec = [0] * length

        for w in range(length):
            m = cls_set[cl_lbl][w]
            k = corpus_freq_w[w]
            w_nfa = combination(k, m) * (n**(1 - m))

            if m != 0:
                meaning_vec[w] = (-1 / m) * math.log1p(w_nfa)
            else:
                meaning_vec[w] = 0

        cls_meaning.update({cl_lbl: meaning_vec})

    return cls_meaning
Exemplo n.º 33
0
             new_genome.count(kmer) * 100 /
             (len(new_genome) - k + 1)))
     cake += 'Ratio \t{}\n'.format(
         str(
             int(
                 all_ends.count(kmer) * 100 /
                 (len(all_ends) - k + 1)) /
             (int(new_genome.count(kmer) * 100) /
              (len(new_genome) - k + 1))))
     pvalue = stats.chi2_contingency(
         [[len(all_ends), len(new_genome)],
          [all_ends.count(kmer),
           new_genome.count(kmer)]])[1]
     print('P-value = {}\n'.format(pvalue))
     cake += 'P-value for {} \t{}\n'.format(
         kmer, str(math.log1p(pvalue)))
     all_p_values.append(pvalue)
     contr += 1
 else:
     lable_k.append(kmer)
     if args.reverse == 'Yes':
         all_sb_friq.append((new_genome.count(kmer) * 100 /
                             (len(new_genome) - k + 1)) +
                            (reverse_genome.count(kmer) * 100 /
                             (len(reverse_genome) - k + 1)))
         end_sb_friq.append((all_ends.count(kmer) * 100 /
                             (len(all_ends) - k + 1)) +
                            (reverse_ends.count(kmer) * 100 /
                             (len(reverse_ends) - k + 1)))
     else:
         all_sb_friq.append(
Exemplo n.º 34
0
    def refine_probs(self):
        """ refine_probs()

            Improve the estimated probabilities used by working with
            the full set of data allocated to each node, rather than
            just the initial sub-set used to create/split nodes.
        """
        # travel up from leaves improving log_rk etc.

        for level_it in range(len(self.assignments) - 1, -1, -1):
            # print(level_it, self.nodes[level_it].keys())

            for node_it in self.nodes[level_it]:
                node = self.nodes[level_it][node_it]

                if node.tree_terminated:
                    if node.nk > 1:
                        # log_rk, etc are accurate
                        node.log_dk = node.true_bhc.root_node.log_dk
                        node.log_pi = node.true_bhc.root_node.log_pi
                        node.logp = node.true_bhc.root_node.logp
                        node.log_ml = node.true_bhc.root_node.log_ml
                        node.log_rk = node.true_bhc.root_node.log_rk
                    else:
                        node.log_dk = self.crp_alpha
                        node.log_pi = 0.
                        node.logp = self.data_model.log_marginal_likelihood(
                            node.data)
                        node.log_ml = node.logp
                        node.log_rk = 0.

                elif node.truncation_terminated:
                    node.log_dk = (math.log(self.crp_alpha) +
                                   math.lgamma(node.nk))
                    node.log_pi = 0.
                    node.logp = self.data_model.log_marginal_likelihood(
                        node.data)
                    node.log_ml = node.logp
                    node.log_rk = 0.

                else:
                    left_child = self.nodes[level_it + 1][node_it * 2]
                    right_child = self.nodes[level_it + 1][node_it * 2 + 1]

                    node.log_dk = np.logaddexp(
                        math.log(self.crp_alpha) + math.lgamma(node.nk),
                        left_child.log_dk + right_child.log_dk)

                    node.log_pi = -math.log1p(
                        math.exp(left_child.log_dk + right_child.log_dk -
                                 math.log(self.crp_alpha) -
                                 math.lgamma(node.nk)))
                    neg_pi = math.log(-math.expm1(node.log_pi))

                    node.logp = self.data_model.log_marginal_likelihood(
                        node.data)

                    node.log_ml = np.logaddexp(
                        node.log_pi + node.logp,
                        neg_pi + left_child.log_ml + right_child.log_ml)
                    node.log_rk = node.log_pi + node.logp - node.log_ml

        # travel down from top improving

        for level_it in range(1, len(self.assignments)):
            for node_it in self.nodes[level_it]:
                node = self.nodes[level_it][node_it]
                parent_node = self.nodes[level_it - 1][int(node_it / 2)]

                node.prev_wk = (parent_node.prev_wk *
                                (1 - math.exp(parent_node.log_rk)))
Exemplo n.º 35
0
        for n in range(N):
            x = X[:, n]
            x2 = np.zeros([1, 2])
            x2[0][0] = x[0] - mean_k_est[0]
            x2[0][1] = x[1] - mean_k_est[1]
            y = np.zeros([2, 1])
            y[0][0] = x[0] - mean_k_est[0]
            y[1][0] = x[1] - mean_k_est[1]
            cov = cov + res[n] * np.dot(y, x2)
        cov_k_est = cov / float(Nk)
        alpha_k_est = Nk / N
        mean[:, k] = mean_k_est
        Cova[:, :, k] = cov_k_est
        alpha[0][k] = alpha_k_est
    ### Likelihood evaluation
    final_liklihood = 0
    for n in range(N):
        x = X[:, n]
        log = 0
        for k in range(K):
            [rr, dd] = responsibility(x, mean, Cova, alpha, k, Model)
            log = log + math.log1p(dd)
        final_likelihood = final_likelihood + log

    print final_likelihood
    ite = ite + 1
    ### After converging the means and variances will be printed
print mean
print Cova
print alpha
Exemplo n.º 36
0
def gen_sweetwords(train, data_in, N):
    """Given the training data and input passwords, generate N sweetwords
    for each input password. One of the sweetwords is the input password.
    
    1. Start by checking if the subword of input password OR
       the entire input password itself belong to a category.
       
    2. If a category can be identified AND it is not the 'nums' category,
       select up to 3 + ln(N) sample training passwords from the category.
       Then, generate N sweetwords using those samples.

    3. Else if the subword is not short (> 4 chars) select up to ln(N) sample
       training passwords at random from the entire training set.
       Then, generate N sweetwords using those samples.
       
    4. Else (the subword is too short and/or the password is all non-letters)
       use algo1 to generate N sweetwords.
    
    Let the output be a list of N sweetwords (including the input password)
    for each input password.
    Returns: The output as a list of list of strings.
    """
    # The row number corresponding to each category
    # In other words, these are 1-indexed and not 0-indexed
    rnames = [
        11, 12, 16, 18, 19, 24, 30, 35, 37, 43, 44, 45, 46, 50, 51, 55, 61, 64,
        65, 67, 70, 71, 74, 76, 77, 78, 81, 86, 92, 93, 95, 97, 98, 99, 100
    ]
    rmisc = [
        6, 8, 14, 26, 27, 31, 32, 33, 36, 39, 47, 57, 58, 59, 69, 73, 83, 85,
        87, 89, 96
    ]
    rlove = [5, 13, 15, 22, 34, 38, 52, 53, 56, 62, 68, 79, 80, 90]
    rpopcult = [25, 29, 41, 49, 54, 63, 66, 75, 88, 91, 94]
    rnums = [1, 2, 3, 7, 9, 17, 21, 23, 40, 48, 72, 82]
    rlazy = [4, 10, 20, 28, 42, 60, 84]

    # A list of passwords split by categories
    names = list(map(lambda x: train[x - 1], rnames))
    misc = list(map(lambda x: train[x - 1], rmisc))
    love = list(map(lambda x: train[x - 1], rlove))
    popcult = list(map(lambda x: train[x - 1], rpopcult))
    nums = list(map(lambda x: train[x - 1], rnums))
    lazy = list(map(lambda x: train[x - 1], rlazy))
    category = {
        "names": names,
        "misc": misc,
        "love": love,
        "popcult": popcult,
        "nums": nums,
        "lazy": lazy
    }

    output = []
    swords = get_subwords(data_in)
    catnms = get_categories(data_in, swords)

    for i, pw in enumerate(data_in):
        genpwds = []
        if (catnms[i] in category.keys()) and (catnms[i] != "nums"):
            # If the password or subword exists in the training data
            randsamples = np.random.choice(category[catnms[i]],
                                           3 + round(math.log1p(N))).tolist()
            randsamples.append(pw)

            # Remove duplicates in samples:
            samples = []
            for elem in randsamples:
                if elem not in samples:
                    samples.append(elem)

            genpwds.extend(gen_with_samples(pw, swords[i], samples, N))
            output.append(genpwds)

        elif len(swords[i]["subword"]) > 4:
            # If the password or subword does not exist in the training data
            # But, the subword is long enough to replace
            randsamples = np.random.choice(train,
                                           round(math.log1p(N))).tolist()
            randsamples.append(pw)

            # Remove duplicates in samples:
            samples = []
            for elem in randsamples:
                if elem not in samples:
                    samples.append(elem)

            genpwds.extend(gen_with_samples(pw, swords[i], samples, N))
            output.append(genpwds)

        else:
            # If the subword is too short or if the password is all non-letters
            algo1_result = algo1.honeyWordGenerator([pw], N - 1)[0]
            genpwds.extend(algo1_result)
            output.append(genpwds)

    return output
def calc_hp(dir=[1], inv=[1]):
    # * map_to_range(random(), 0, 1, 0.5, 1.5)
    return round(log1p(prod(dir) * prod(map(lambda x: 1 / x, inv))), 2)
def next_temp(initial_temp,
              iteration,
              max_iteration,
              current_temp,
              slope=None,
              standard_deviation=None):
    if Config.SA_AnnealingSchedule == 'Linear':
        temp = (float(max_iteration - iteration) /
                max_iteration) * initial_temp
        print("\033[36m* COOLING::\033[0m CURRENT TEMP: " + str(temp))
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Exponential':
        temp = current_temp * Config.SA_Alpha
        print("\033[36m* COOLING::\033[0m CURRENT TEMP: " + str(temp))
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Logarithmic':
        # this is based on "A comparison of simulated annealing cooling strategies"
        # by Yaghout Nourani and Bjarne Andresen
        temp = Config.LogCoolingConstant * (
            1.0 / log10(1 + (iteration + 1))
        )  # iteration should be > 1 so I added 1
        print("\033[36m* COOLING::\033[0m CURRENT TEMP: " + str(temp))
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Adaptive':
        temp = current_temp
        if iteration > Config.CostMonitorQueSize:
            if 0 < slope < Config.SlopeRangeForCooling:
                temp = current_temp * Config.SA_Alpha
                print("\033[36m* COOLING::\033[0m CURRENT TEMP: " + str(temp))
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Markov':
        temp = initial_temp - (iteration /
                               Config.MarkovNum) * Config.MarkovTempStep
        if temp < current_temp:
            print("\033[36m* COOLING::\033[0m CURRENT TEMP: " + str(temp))
        if temp <= 0:
            temp = current_temp
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Aart':
        # This is coming from the following paper:
        # Job Shop Scheduling by Simulated Annealing Author(s): Peter J. M. van Laarhoven,
        # Emile H. L. Aarts, Jan Karel Lenstra
        if iteration % Config.CostMonitorQueSize == 0 and standard_deviation is not None and standard_deviation != 0:
            temp = float(current_temp) / (
                1 + (current_temp *
                     (log1p(Config.Delta) / standard_deviation)))
            print("\033[36m* COOLING::\033[0m CURRENT TEMP: " + str(temp))
        elif standard_deviation == 0:
            temp = float(current_temp) * Config.SA_Alpha
            print("\033[36m* COOLING::\033[0m CURRENT TEMP: " + str(temp))
        else:
            temp = current_temp
#   ----------------------------------------------------------------
    elif Config.SA_AnnealingSchedule == 'Huang':
        if standard_deviation is not None and standard_deviation != 0:
            temp = float(current_temp) / (
                1 + (current_temp *
                     (log1p(Config.Delta) / standard_deviation)))
            print("\033[36m* COOLING::\033[0m CURRENT TEMP: " + str(temp))
        elif standard_deviation == 0:
            temp = float(current_temp) * Config.SA_Alpha
            print("\033[36m* COOLING::\033[0m CURRENT TEMP: " + str(temp))
        else:
            temp = current_temp


#   ----------------------------------------------------------------
    else:
        raise ValueError('Invalid Cooling Method for SA...')
    return temp
Exemplo n.º 39
0
def log1p_exp(val):
    """Numerically stable implementation of `log(1 + exp(val))`."""
    if val > 0.0:
        return val + log1p(exp(-val))
    else:
        return log1p(exp(val))
Exemplo n.º 40
0
def math_log1p(A, B):
    i = cuda.grid(1)
    B[i] = math.log1p(A[i])
Exemplo n.º 41
0
def get_log(num):
    num = math.log1p(num)
    return num
Exemplo n.º 42
0
def compute_alphas(Input_Vector, Errors):
    alphas = []
    for i in range(len(Errors)):
        alphas.append(.5 * math.log1p((1 - Errors[i]) / Errors[i]))
        
    return alphas
Exemplo n.º 43
0
 def log1p(self):
     self.result = False
     self.current = math.log1p(math.radians(float(txtDisplay.get())))
     self.display(self.current)
Exemplo n.º 44
0
        prob_predic = np.load(path_prob_uni + '/' + name_file)
        prob_predic = sorted(enumerate(prob_predic),
                             key=lambda x: list(x[1])[1],
                             reverse=True)

        doc = open(path_data_labels_test + name_file.replace('.npy', ''),
                   'r').read().strip()

        arr_all_sents = doc.split("\n")

        print('prob', len(prob_predic), 'all sent', len(arr_all_sents))
        sents_values = []
        for s in prob_predic:
            index = int(s[0])
            posi_fea = math.log1p(1 / (1 + index))
            sent = arr_all_sents[index][2:]
            ele = (index, sent,
                   text_utils_eng.clean_stem_sent(sent, list_stopwords),
                   s[1][1], posi_fea * 10)
            sents_values.append(ele)

        # sents_values = sorted(sents_values, key=lambda x: x[0])

        summari = mmr_selection_eng.make_summary(sents_values, 0.95)

        summari = re.sub(r'\s+', ' ', summari)

        f = open(path_results + '/system_' + id, 'w')
        f.write(summari)
Exemplo n.º 45
0
def performAnalysis():
	"""Perform the analysis."""
	global magList
	global phaList
	global sweepit
	global admittanceMatrix
	global currentMatrix
	result = ""
	formAdmittanceMatrix()
	admittanceMatrix = np.array(admittanceMatrix)
	currentMatrix = np.array(currentMatrix)
	basecurrentMatrix = copy.deepcopy(currentMatrix)
	voltageMatrix = [0 for i in currentMatrix]
	# voltageMatrixOld = [0 for i in currentMatrix]
	voltageAcrossOld = 0
	conv = 100000
	while conv > 10**-20:
		nlJacobian = [[0 for j in i] for i in admittanceMatrix]
		currentMatrix = copy.deepcopy(basecurrentMatrix)
		# form currentMatrix & complete nlJacobian
		for i in commands:
			commandtext = i.split(' ')
			if not(i.startswith('.')):
				nodes = getNodes(commandtext[2])
			if (commandtext[0].lower() == 'd'):
				print(commandtext[1])
				node1 = nodeList.index(nodes[0])
				node2 = nodeList.index(nodes[1])
				voltageAcross = 0
				if ((node1 == 0) & (node2 == 0)):
					voltageAcross = 0
				elif(node1 == 0):
					voltageAcross = -voltageMatrix[node2 - 1]
				elif(node2 == 0):
					voltageAcross = voltageMatrix[node1 - 1]
				else:
					voltageAcross = voltageMatrix[node1 - 1] - voltageMatrix[node2 - 1]

				if (voltageAcrossOld < vCriticalDiode):
					pass
				else:
					voltageAcross = voltageAcrossOld + thermalVoltage *  \
						math.log1p((((voltageAcross - voltageAcrossOld) / thermalVoltage) + 1).real)

				diodeCurrent = satCurrent * (math.e ** (voltageAcross / thermalVoltage) - 1)
				print(voltageAcross)
				if node1 != 0:
					currentMatrix[node1 - 1] -= diodeCurrent
					nlJacobian[node1 - 1][node1 - 1] += -diodeCurrent / thermalVoltage
				if node2 != 0:
					currentMatrix[node2 - 1] += diodeCurrent
					nlJacobian[node2 - 1][node2 - 1] += -diodeCurrent / thermalVoltage
				if((node1 != 0) & (node2 != 0)):
					nlJacobian[node1 - 1][node2 - 1] += diodeCurrent / thermalVoltage
					nlJacobian[node2 - 1][node1 - 1] += diodeCurrent / thermalVoltage

		Jacobian = admittanceMatrix - nlJacobian
		voltageMatrix2 = np.dot(np.linalg.inv(Jacobian), -np.dot(nlJacobian, voltageMatrix) + currentMatrix)
		conv = 0
		for i in range(len(voltageMatrix)):
			conv += (voltageMatrix[i] - voltageMatrix2[i])**2

		for i in range(len(voltageMatrix)):
			if ((voltageMatrix[i] - voltageMatrix2[i]) > 10):
				voltageMatrix2[i] = voltageMatrix[i] - 10
			elif ((voltageMatrix2[i] - voltageMatrix[i]) > 10):
				voltageMatrix2[i] = voltageMatrix[i] + 10
		voltageMatrix = copy.copy(voltageMatrix2)
	# admittanceMatrixInvert = np.linalg.inv(admittanceMatrix)
	# solutionMatrix = np.dot(admittanceMatrixInvert, currentMatrix)
	# solutionMatrix = solutionMatrix.tolist()
	solutionMatrix = voltageMatrix.tolist()

	for i in range(len(commands)):
		commandtext = commands[i].split(' ')
		if not(commands[i].startswith('.')):
			nodes = getNodes(commandtext[2])
		if ((commandtext[0].lower() == 'r') | (commandtext[0].lower() == 'g') |
			(commandtext[0].lower() == 'l') | (commandtext[0].lower() == 'c') |
			(commandtext[0].lower() == 'd')):
			if not(commandtext[0].lower() == 'd'):
				componentValue = getComponentValue(commandtext[3])
			node1 = nodeList.index(nodes[0])
			node2 = nodeList.index(nodes[1])
			voltageAcross = 0
			if ((node1 == 0) & (node2 == 0)):
				voltageAcross = 0
			elif(node1 == 0):
				voltageAcross = -solutionMatrix[node2 - 1]
			elif(node2 == 0):
				voltageAcross = solutionMatrix[node1 - 1]
			else:
				voltageAcross = solutionMatrix[node1 - 1] - solutionMatrix[node2 - 1]

		if(commandtext[0].lower() == 'r'):
			voltageMatrixLabels.append("I(" + commandtext[1] + ")")
			solutionMatrix.append(voltageAcross / componentValue)

		elif(commandtext[0].lower() == 'g'):
			voltageMatrixLabels.append("I(" + commandtext[1] + ")")
			solutionMatrix.append(voltageAcross * componentValue)

		elif(commandtext[0].lower() == 'l'):
			if (simulationDomain == "AC"):
				componentAdmittance = (-1j) / (2 * math.pi * simulationFrequency * componentValue)
				voltageMatrixLabels.append("I(" + commandtext[1] + ")")
				solutionMatrix.append(voltageAcross * componentAdmittance)

		elif(commandtext[0].lower() == 'c'):
			if (simulationDomain == "AC"):
				componentAdmittance = (1j) * (2 * math.pi * simulationFrequency * componentValue)
				voltageMatrixLabels.append("I(" + commandtext[1] + ")")
				solutionMatrix.append(voltageAcross * componentAdmittance)

		elif(commandtext[0].lower() == 'd'):
			voltageMatrixLabels.append("I(" + commandtext[1] + ")")
			solutionMatrix.append(satCurrent * (math.e ** (voltageAcross / thermalVoltage) - 1))

	for i in range(len(solutionMatrix)):
		# print(voltageMatrixLabels[i] + " = " + str(solutionMatrix[i]))
		print(voltageMatrixLabels[i] + " = " + str(rect2pol(solutionMatrix[i])[0]) + "[" + str(rect2pol(solutionMatrix[i])[1]) + "]")
		result += voltageMatrixLabels[i] + " = " + str(rect2pol(solutionMatrix[i])[0]) + "[" + str(rect2pol(solutionMatrix[i])[1]) + "]\n"
	if len(toGraph) > 0:
		a = rect2pol(solutionMatrix[nodeList.index(toGraph[0]) - 1])[0]
		b = rect2pol(solutionMatrix[nodeList.index(toGraph[0]) - 1])[1]
		c = rect2pol(solutionMatrix[nodeList.index(toGraph[1]) - 1])[0]
		d = rect2pol(solutionMatrix[nodeList.index(toGraph[1]) - 1])[1]
		if simulationParameters[1].lower() == "op":
			plot2sine(a, b, c, d)
		elif simulationParameters[1].lower() == "sweep":
			magList[sweepit] = rect2pol(solutionMatrix[nodeList.index(simulationParameters[6].lower()) - 1])[0]
			phaList[sweepit] = rect2pol(solutionMatrix[nodeList.index(simulationParameters[6].lower()) - 1])[1]
			sweepit += 1

	return result
Exemplo n.º 46
0
# In[ ]:

# In[34]:

df_res['result'] = df_res['result'] / 6
df_res['result_lgb_dart'] = df_res['result_lgb_dart'] / 6

# In[ ]:

# In[35]:

df_res['count_na'] = online_train['count'].apply(lambda x: np.nan
                                                 if x == 0 else x)
df_res['m'] = df_res['count_na'].apply(
    lambda x: max(0.61, 1 / math.log1p(x + 1)))

# In[ ]:

# In[36]:

df_res['result_PostProcess'] = df_res['result'] * df_res['m']
df_res['result_lgb_dart_PostProcess'] = df_res['result_lgb_dart'] * df_res['m']

# In[ ]:

# In[ ]:

# In[ ]:

# In[37]:
Exemplo n.º 47
0
# In[103]:

############# [Test Code]
# print ui_buy

# In[135]:

# get train X,Y
x = np.zeros((len(train_data29), 4))
y = np.zeros((len(train_data29), ))

index = 0
for line in train_data29:
    uid = (line[0], line[1], line[-1] - 1)
    for i in range(4):
        x[index][i] = math.log1p(ui_dict[i][uid] if uid in ui_dict[i] else 0)
    uid = (line[0], line[1], line[-1])
    y[index] = 1 if uid in ui_buy else 0
    index += 1

# In[136]:

# get prediction px
px = np.zeros((len(train_data30), 4))

index = 0
for line in train_data30:
    uid = (line[0], line[1], line[-1] - 1)
    for i in range(4):
        px[index][i] = math.log1p(ui_dict[i][uid] if uid in ui_dict[i] else 0)
    index += 1
Exemplo n.º 48
0
# R R3 (N4;N0) 100"""
netlist = """.DC OP
.GND N0
OPAMP3 O0 (N3;N2;N1) 
V V0 (N2;N0) 10.0
R R1 (N3;N0) 1000.0
R R2 (N3;N1) 1000.0
"""

sweepit = 0
satCurrent = 10**-14
thermalVoltage = 0.026
magList = [0 for i in range(100)]
phaList = [0 for i in range(100)]
i = 0
vCriticalDiode = thermalVoltage * math.log1p(thermalVoltage / (math.sqrt(2) * satCurrent))
simulationDomain = ""
simulationFrequencies = []
simulationParameters = []
toGraph = []
commands = []
nodeList = []
nodeListNatural = []
voltageSources = 0
groundNode = "n0"
nodeCount = 0
admittanceMatrix = []
currentMatrix = []
groundNodeIndex = []
voltageMatrixLabels = []
simulationFrequency = 0
Exemplo n.º 49
0
    indices = [index[w] for w in tokens if w in index]
    z = len(indices)

    for k in range(0, z):
        context[indices[k]][indices[k]] += 1
        contextCount[indices[k]] += 1
        p[indices[k]] += 1
        for l in range(k + 1, z):
            context[indices[k]][indices[l]] += 1
            context[indices[l]][indices[k]] += 1

idf = [math.log((m + 1) / x) for x in contextCount]

for k in range(0, n):
    p[k] = math.log1p(p[k]) * idf[k]

total = sum(p)

p = list(map(lambda x: x / total, p))

total = 0
for k in range(0, n):
    for l in range(0, n):
        context[k][l] = math.log1p(context[k][l]) * idf[l]
    total += sum(context[k])

for k in range(0, n):
    for l in range(0, n):
        context[k][l] /= total
        #context[k][l] *= idf[l]
Exemplo n.º 50
0
    def arcProbability(self):
        try:
            # Next code applies product of probabilities, or sum of logs of probabilities
            if Classifier.weightFormula == '4' or Classifier.weightFormula == '5':
                for arc in Classifier.globalArcs:  # for each TLINK in the final classifier
                    relTuple = ()
                    for i in range(15):
                        if Classifier.weightFormula == '4':  # use sum of logs
                            prob = 0
                        else:  # use product of probabilities
                            prob = 1
                        assignedByClassifier = False  # relType was not assigned by any classifier - possible future use
                        for cl in Classifier.classifierList:
                            if Classifier.globalArcs[arc][cl][i] == 1:
                                assignedByClassifier = True  # if at least one classifier assigns the relType, set to True
                                if Classifier.weightFormula == '4':  # if '4' use sum of logs formula
                                    prob += math.log1p(
                                        Classifier.getWeight(cl))
                                else:  # if '5' use product formula
                                    prob *= Classifier.getWeight(cl)
                            else:
                                if Classifier.weightFormula == '4':
                                    prob += math.log1p(
                                        1 - Classifier.getWeight(cl))
                                else:
                                    prob *= 1 - Classifier.getWeight(cl)
#            if assignedByClassifier == False:              # if no classifier assigned the reltype, weigh according to popularity of reltype
#              prob *= Classifier.probReltype[i]            # might implement this later
                        relTuple += (prob, )
                    self.setFinalClassifier(arc, relTuple)
                return Classifier.finalArcs

            # If formula is '3' or '6' or '7', need to normalise sum of weights to 1
            if Classifier.weightFormula == '3' or Classifier.weightFormula >= '6':
                totalWeight = float(0)
                for cl in Classifier.classifierList:
                    totalWeight += Classifier.getWeight(
                        cl)  # get total weight of all classifiers
                for cl in Classifier.classifierList:
                    Classifier.setWeight(
                        cl,
                        Classifier.getWeight(cl) / totalWeight
                    )  # divide each classifier weight by total weight to normalise sum to 1

            # Next code applies LOSS functions
            if Classifier.weightFormula >= '6':
                for arc in Classifier.globalArcs:  # for each TLINK in the final classifier
                    relTuple = ()
                    for i in range(15):
                        prob = 0
                        for cl in Classifier.classifierList:
                            if Classifier.globalArcs[arc][cl][
                                    i] != 1:  # if prediction doesn't match
                                t = 1
                            else:
                                t = 0
                            if Classifier.weightFormula == '6':
                                prob += t * Classifier.getWeight(
                                    cl
                                )  # add classifier's weight to total probability
                            elif Classifier.weightFormula == '7':
                                prob += t * Classifier.getWeight(
                                    cl)  # hinge loss
                            elif Classifier.weightFormula == '8':
                                prob += t * Classifier.getWeight(
                                    cl) * Classifier.getWeight(
                                        cl)  # square loss
                            elif Classifier.weightFormula == '9':
                                prob += t * math.log1p(
                                    Classifier.getWeight(cl)) + t * math.log1p(
                                        1 -
                                        Classifier.getWeight(cl))  # log loss
                        if Classifier.weightFormula == '6':
                            prob = 1 - prob  # invert the loss
                            prob *= Classifier.probReltype[
                                i]  # multiply total by prior probability of reltype
                        else:
                            prob *= (
                                Classifier.probReltype[i] - 1
                            )  # multiply by probability of NOT being reltype and negate for Maximise objective function
                        relTuple += (prob, )
                    self.setFinalClassifier(arc, relTuple)
                return Classifier.finalArcs

            for arc in Classifier.globalArcs:  # for each TLINK in the final classifier
                if Classifier.weightFormula != '1':
                    numClassifiers = float(len(Classifier.classifierList)
                                           )  # total number of classifiers
                else:
                    numClassifiers = float(
                        0
                    )  # increment for each classifier that detects the arc
                relTuple = self.emptyTuple
                for cl in Classifier.classifierList:
                    if cl in Classifier.globalArcs[
                            arc]:  # if classifier has identified the arc
                        if Classifier.weightFormula == '1' and sum(
                                Classifier.globalArcs[arc][cl]) >= 1:
                            numClassifiers += 1  # if weight is proportion of classifiers that identified arc, increment number
                        relTuple = self.addRelTuple(
                            relTuple, Classifier.globalArcs[arc][cl], cl
                        )  # add this classifier's probability to the existing tuple of probabilities
                    else:
                        relTuple = self.addRelTuple(
                            relTuple, self.noneTuple, cl
                        )  # redundant unless we introduce NONE as valid reltype
                if Classifier.weightFormula == '1':  # if weight is number of classifiers that identified arc, need to divide probability by this number
                    newTuple = ()
                    for prob in relTuple:  # for each relType
                        newTuple += (
                            prob / numClassifiers,
                        )  # set to proportion of classifiers that assigned that relType


#          if numClassifiers > 1:                          # temporary
                    self.setFinalClassifier(arc, newTuple)
                elif Classifier.weightFormula == '3':
                    if sum(
                            relTuple
                    ) >= 0.5:  # arc is only included if total probability greater than threshold (default 0.5)
                        self.setFinalClassifier(arc, relTuple)
                else:
                    self.setFinalClassifier(arc, relTuple)
        except Exception as X:
            print "Error assigning probabilities to relTypes "
            raise
        return Classifier.finalArcs
Exemplo n.º 51
0
    def word_doc_counts(
        self,
        *,
        normalize: str = "lemma",
        weighting: str = "count",
        smooth_idf: bool = True,
        as_strings: bool = False,
        filter_stops: bool = True,
        filter_punct: bool = True,
        filter_nums: bool = True,
    ) -> Dict[Union[int, str], Union[int, float]]:
        """
        Map the set of unique words in :class:`Corpus` to their *document* counts
        as absolute, relative, inverse, or binary frequencies of occurence.

        Args:
            normalize: If "lemma", lemmatize words before counting; if
                "lower", lowercase words before counting; otherwise, words are
                counted using the form with which they appear.
            weighting ({"count", "freq", "idf"}): Type of weight to assign to words.
                If "count" (default), weights are the absolute number (count)
                of documents in which word appears. If "freq", word doc counts
                are normalized by the total document count, giving their relative
                frequencies of occurrence. If "idf", weights are the log of the
                inverse relative frequencies: ``log(n_docs / word_doc_count)``
                or (if ``smooth_idf`` is True) ``log(1 + (n_docs / word_doc_count))`` .
            smooth_idf: If True, add 1 to all word doc counts when
                calculating "idf" weighting, equivalent to adding a single
                document to the corpus containing every unique word.
            as_strings: If True, words are returned as strings; if False
                (default), words are returned as their unique integer ids
            filter_stops: If True (default), stop word counts are removed.
            filter_punct: If True (default), punctuation counts are removed.
            filter_nums: If True (default), number counts are removed.

        Returns:
            Mapping of a unique word id or string (depending on the value
            of ``as_strings``) to the number of documents in which it appears
            weighted as absolute, relative, or binary frequencies (depending
            on the value of ``weighting``).

        See Also:
            :func:`textacy.vsm.get_doc_freqs() <textacy.vsm.matrix_utils.get_doc_freqs>`
        """
        word_doc_counts_: Union[Counter[Any], Dict[Any, Union[int, float]]]
        word_doc_counts_ = collections.Counter()
        for doc in self:
            word_doc_counts_.update(
                doc._.to_bag_of_words(
                    normalize=normalize,
                    weighting="binary",
                    as_strings=as_strings,
                    filter_stops=filter_stops,
                    filter_punct=filter_punct,
                    filter_nums=filter_nums,
                ))
        if weighting == "count":
            word_doc_counts_ = dict(word_doc_counts_)
        elif weighting == "freq":
            n_docs = self.n_docs
            word_doc_counts_ = {
                word: count / n_docs
                for word, count in word_doc_counts_.items()
            }
        elif weighting == "idf":
            n_docs = self.n_docs
            if smooth_idf is True:
                word_doc_counts_ = {
                    word: math.log1p(n_docs / count)
                    for word, count in word_doc_counts_.items()
                }
            else:
                word_doc_counts_ = {
                    word: math.log(n_docs / count)
                    for word, count in word_doc_counts_.items()
                }
        else:
            raise ValueError(
                errors.value_invalid_msg("weighting", weighting,
                                         {"count", "freq", "idf"}))
        return word_doc_counts_
Exemplo n.º 52
0
def classify(dictInput, dictModel, dictIDF, nGrams):

    script_name = "models.classify"

    if dictInput == {}:
        return None

    if dictModel == {}:
        return None

    if dictIDF == {}:
        return None

    if dictInput.has_key("_words"):
        print script_name, "error: input model is not normalized, _words key found"

    if dictModel.has_key("_words"):
        print script_name, "error: reference model is not normalized, _words key found"

    if dictIDF.has_key("_words"):
        print script_name, "error: idf model is not normalized, _words key found"

    fScore = 0.0
    dictExplain = {}

    # this is applied directly to the final score, so use sparingly
    size_adjust = 1.0
    # upper
    if len(dictInput) > 500:
        size_adjust = 0.7
    # lower
    if len(dictInput) < 100:
        size_adjust = 2.0
    if len(dictInput) < 25:
        size_adjust = 6.0
    if len(dictInput) < 10:
        size_adjust = 10.0
    if len(dictInput) < 3:
        size_adjust = 20.0
    if len(dictInput) < 2:
        size_adjust = 33.0

    # tbd: this needs work...
#     input_vs_model = (float(len(dictInput)) / float(len(dictModel))) # e.g. .004
#     if input_vs_model > .002:
#         size_adjust = 0.67
# print size_adjust,

# compute average frequencies for various lengths
    dict_freqs = compute_average_frequency_by_length(dictModel)

    for gram in dictInput.keys():
        if float(dictInput[gram]) > 1.0:
            continue
        gram_count = count_grams(gram)
        if gram_count > 1:
            gc = 0
            gl = 0
            for g in gram.split('_'):
                gc = gc + 1
                gl = gl + len(g)
            gram_average_length = float(gl) / float(gc)
        else:
            gram_average_length = len(gram)

        # is it in the classification model?
        if dictModel.has_key(gram):
            idf = dictModel[gram]
            if dictIDF.has_key(gram):
                if dictIDF[gram] > idf:
                    idf = dictIDF[gram]
        else:
            continue

        # compute tf/idf
        fContrib = float(
            (float(dictInput[gram]) / float(idf)) *
            (math.log1p(gram_average_length) * (gram_average_length**2)))

        # aggregate
        if fContrib > 1.0:
            # accept notable contributions only
            fScore = fScore + fContrib
            # add to explain dictionary
            if dictExplain.has_key(gram):
                dictExplain[gram] = dictExplain[gram] + fContrib
            else:
                dictExplain[gram] = fContrib
        # end if

    # end for

    # aggregate the top 20 hits into the score
    # to do: rewrite so that we score as the model (dictInput) is built, and stop as soon as we have 10 hits
    fScore1 = 0.0
    fScoreFinal = 0.0
    xTop = 50
    top = xTop
    lstExplain = sorted(dictExplain.iteritems(),
                        key=operator.itemgetter(1),
                        reverse=True)
    for (term, count) in lstExplain:
        if count > 1.0:
            fScore1 = fScore1 + 1.0
        if count > 0.25 and count <= 1.0:
            fScore1 = fScore1 + 0.75
        if count > 0.1 and count <= 0.25:
            fScore1 = fScore1 + 0.5
        top = top - 1
        if top == 0:
            break

    # adjust small models
    fScore1 = fScore1 * size_adjust

    # end for
    if len(dictInput) < 100:
        fScoreFinal = float(float(fScore1) / (float(xTop) / 1.5))
    else:
        fScoreFinal = float(float(fScore1) / float(xTop))

    if fScoreFinal > 1.0:
        fScoreFinal = 1.0

    return [fScoreFinal, dictExplain]
Exemplo n.º 53
0
 def test_log1p(self):
     import math
     self.ftest(math.log1p(1 / math.e - 1), -1)
     self.ftest(math.log1p(0), 0)
     self.ftest(math.log1p(math.e - 1), 1)
     self.ftest(math.log1p(1), math.log(2))
Exemplo n.º 54
0
#!/usr/bin/python
import random
import math

a = 0
b = 1000

p = 0.8
cont = 0.0
a = 0

for a in range(a, b):
    u = random.random()
    cont = a + (math.log1p(1.0 - u) / math.log1p(p))
    a = a + 1

print float(cont / b)
Exemplo n.º 55
0
 def h(p):
     q = 1 - p
     return -(q * log1p(-p) + p * log(p)) / (log(2) * p)
Exemplo n.º 56
0
def cu_kernel_forward(log_probs, labels, alpha, log_p, T, U, blank, lock):
    """
    Compute forward pass for the forward-backward algorithm using Numba cuda kernel.
    Sequence Transduction with naive implementation : https://arxiv.org/pdf/1211.3711.pdf

    Arguments
    ---------
    log_probs : tensor
        4D Tensor of (batch x TimeLength x LabelLength x outputDim) from the Transducer network.
    labels : tensor
        2D Tensor of (batch x MaxSeqLabelLength) containing targets of the batch with zero padding.
    alpha : tensor
        3D Tensor of (batch x TimeLength x LabelLength) for forward computation.
    log_p : tensor
        1D Tensor of (batch) for forward cost computation.
    T : tensor
        1D Tensor of (batch) containing TimeLength of each target.
    U : tensor
        1D Tensor of (batch) containing LabelLength of each target.
    blank : int
        Blank indice.
    lock : tensor
        2D Tensor of (batch x LabelLength) containing bool(1-0) lock for parallel computation.
    """

    # parallelize the forward algorithm over batch and target length dim
    b = cuda.blockIdx.x
    u = cuda.threadIdx.x
    t = 0
    if u <= U[b]:
        # for each (B,U) Thread
        # wait the unlock of the previous computation of Alpha[b,U-1,:]
        # Do the computation over the whole Time sequence on alpha[B,U,:]
        # and then unlock the target U+1 for computation
        while t < T[b]:
            if u == 0:
                if t > 0:
                    alpha[b, t, 0] = (alpha[b, t - 1, 0] +
                                      log_probs[b, t - 1, 0, blank])
                cuda.atomic.add(lock, (b, u + 1), -1)
                t += 1
            else:
                if cuda.atomic.add(lock, (b, u), 0) < 0:
                    if t == 0:
                        alpha[b, 0,
                              u] = (alpha[b, 0, u - 1] +
                                    log_probs[b, 0, u - 1, labels[b, u - 1]])
                    else:
                        # compute emission prob
                        emit = (alpha[b, t, u - 1] +
                                log_probs[b, t, u - 1, labels[b, u - 1]])
                        # compute no_emission prob
                        no_emit = (alpha[b, t - 1, u] +
                                   log_probs[b, t - 1, u, blank])
                        # do logsumexp between log_emit and log_no_emit
                        alpha[b, t, u] = max(no_emit, emit) + math.log1p(
                            math.exp(-abs(no_emit - emit)))
                    if u < U[b]:
                        cuda.atomic.add(lock, (b, u + 1), -1)
                    cuda.atomic.add(lock, (b, u), 1)
                    t += 1
        if u == U[b]:
            # for each thread b (utterance)
            # normalize the loss over time
            log_p[b] = (alpha[b, T[b] - 1, U[b]] +
                        log_probs[b, T[b] - 1, U[b], blank]) / T[b]
Exemplo n.º 57
0
def geometric_int(p):
    if p <= 0: return arbitrary_int()
    elif p >= 1: return 0
    denom = log1p(-p)
    return int(log(rand()) / denom)
Exemplo n.º 58
0
def h(p):
    return -(p * log(p) + (1 - p) * log1p(-p))
Exemplo n.º 59
0
def geometric(p):
    if p <= 0 or p > 1:
        raise ValueError('p must be in the interval (0.0, 1.0]')
    if p == 1:
        return 1
    return int(math.log1p(-random.random()) / math.log1p(-p)) + 1
Exemplo n.º 60
0
print("exp(4) is ", exp_x)

# expm1(x) Returns e**x - 1
exp_x1 = math.expm1(4)
print("expm1(4) is ", exp_x1)

# log(x[, base]) > log of 5 with base e.
log_5 = math.log(5)
print("log(5) is ", log_5)

log_5b2 = math.log(5, 2)
print("log(5, 2) is ", log_5b2)
print("exp(log_5b2, 2) is ", math.pow(2, log_5b2))

# log1p(x) > 1+x base e.
log1p_x = math.log1p(5)
print("log1p(5) is ", log1p_x)

# log2(5) > log of 5 with base 2.
log2 = math.log2(5)
print("log2(5) is ", log2)

print("***************** Angular Conversions ********************** ")

#  ##############################################
# Angular conversion.
# Py(22/7) radians = 180 degrees.
radian = 57.2985  # 1 Radian = 57.2958 degrees.
deg = math.degrees(4)
print("degree(4) is ", deg)
print("4 * radian is ", (4 * radian))