def cv_evaluation_range(model, n_fold, X_train, y_train, score_function):
    scores   = []
    cv = StratifiedKFold(y_train, n_folds=n_fold, random_state=1001)
    enumerate(cv)
    for i, (t, v) in enumerate(cv):
        # train then immediately predict the test set
        y_hat = model.fit(X_train.loc[t], y_train.loc[t]).predict(X_train.loc[v])
        # stash the overall error on the test set for the fold too
        scores.append( score_function(y_train[v], y_hat) )
    return [np.mean(scores) - 1.96*np.sd(scores), np.mean(scores) + 1.96*np.sd(scores)]
Пример #2
0
    def normalize(self, data : np.array) -> np.array:
        """
        2D normalization:
            Data = (Data - Mean)/standard_deviation
        
        Args:
            data : 2d numpy array
            
        Return
            normalized 2d numpy array data
        """

        mean = []
        standard_deviation = []

        sample_size, feature_size = data.shape

        for i in range(feature_size):
            each_mean = np.mean(data[:,i])
            each_sd = np.sd(data[:,i]) # can also use : each_sd = np.max(data[:,i]) - np.min(data[:,i])
            mean.append(each_mean)
            standard_deviation.append(each_sd)

        normalized_data = (data - mean) / standard_deviation

        return normalized_data
Пример #3
0
def Ztest(perm_dict, out_fp):
    "this calculate a zscore and zstat from perm_dict values"
    #Print out ztest results
    #Columns are 1) exp coreness 2) Permutation mean 3) Perm sd 4) clade zscore 5) pval
    out_file = out_fp + "_stats.txt"
    f1 = open(out_file, 'w+')
    for otu in perm_dict:
        exp = perm_dict[otu][0]  # observed coreness
        m = mean(perm_dict[otu][1:])  # mean perms
        s = sd(perm_dict[otu][1:])  # standard deviation perms
        if s == 0:  # might need to develop a different solution for this.
            z = 1
            p = norm.sf(z)  #upper tail of cumulative probability distribution
            print >> f1, "%s\t" % otu,
            print >> f1, "\t".join(
                "%.2E" % x for x in [exp, m, s, z, p]),  #join stats and print
            print >> f1, "\n",
        else:
            z = (float(exp) - float(m)) / float(s)  #zscore
            p = norm.sf(z)  #upper tail of cumulative probability distribution
            print >> f1, "%s\t" % otu,
            #print >> f1, "\t".join("%4.3f" % x for x in [exp,m,s,z,p]), #join stats and print
            print >> f1, "\t".join(
                "%.2E" % x for x in [exp, m, s, z, p]),  #join stats and print
            print >> f1, "\n",
    f1.close()
    return None
Пример #4
0
def metrics(wealth):
    n = len(wealth)
    times = range(n)
    plt.plot(times, wealth, c='blue')
    plt.title('Evolution of the wealth')
    plt.xlabel('Seconds')
    plt.ylabel('Dollars')
    plt.show()

    log_wealth = np.log(wealth)
    list_logreturns = np.diff(log_wealth)

    plt.plot(range(n - 1), list_logreturns, c='blue')
    plt.title('Evolution of the log-returns')
    plt.xlabel('Seconds')
    plt.show()

    plt.hist(list_logreturns, bins='auto')
    plt.title('Distribution of the log-returns')
    plt.show()

    #Maybe do montecarlo and compute VaR = np.percentile(montecarlo_logreturns,5)

    sharpe = np.mean(list_logreturns) / np.sd(list_logreturns)
    print('The Sharpe ratio is:', sharpe)

    cum_return = (wealth[n - 1] - wealth[0]) / wealth[0]
    print('The total cumulative return is:', cum_return)
    return
Пример #5
0
def roll_stats():
	probabilities = {};
	values = all_values()
	avg = np.mean(values)
	med = np.median(values)
	sd = np.sd(values)
	counts = Counter(values)
	for c in counts:
		probabilities[str (c)] = str (counts[c]) + '/'  + str (len(values))
	return {'avg': avg, 'median': med, 'probabilities': probabilities, 'counts': counts, 'sd': sd}
    def negative_gradient(self, y, pred, **kargs):
        """Compute the residual (= negative gradient). """
        self.ind1 = np.where(y==1)[0]
        self.n1 = len(self.ind1)
        self.ind0 = np.where(y==0)[0]
        self.n0 = len(self.ind0)
        # our predictions are between [0,1] but the algorithm expects [-1,1]
        pred = (pred - 0.5) / np.sd(pred)

        self.M0 = np.repeat(pred[self.ind1], self.n0) - pred[self.ind0]
        M1 = self.approx_grad(self.M0)
        ng = np.empty(self.n0 + self.n1)
        ng[self.ind1] = np.sum(M1, axis=1)
        ng[self.ind0] = np.sum(M1, axis=0)
        return ng
Пример #7
0
def dg_from_dist(n_samples=25, offset=-10.0):
    """
    Generate some fake results for MMGBSA then return mean and
    standard error as an uncertainty estimate.

    Parameters
    ----------
    n_samples: integer
        Number of samples to draw
    offset: float
        Offset from 0 - generally MMGBSA results are more negative than 
        experimental ones
    """

    target = -1 * halfnorm.rvs(size=1)[0] + offset

    sample = np.random.randn(n_samples) + target

    return np.mean(sample), np.sd(sample)/np.sqrt(n_samples)
Пример #8
0
def con_general(infile):
    with open(infile, 'r') as f:
        text = f.readlines()
        constraints = [ccondense(cstrip(line)) for line in text if ...]
        conlengths = [len(constraint) for constraint in constraints]
        return conlengths

def cstrip(constraint):
    ...

def ccondense(constraint): # or just len?
    ...

import numpy
conlengths = con_general('...')
print numpy.mean(conlengths)
print numpy.sd(conlengths)
#print distribution or at least hist
Пример #9
0
def sd(lst):
    return np.sd(lst)
Пример #10
0
    if sample2.endswith("S2"):
      area1.append(value2)
    if sample2.endswith("S3"):
      area1.append(value2)
    if sample2.endswith("S7"):
      area2.append(value2)
    if sample2.endswith("S8"):
      area2.append(value2)
    if sample2.endswith("S10"):
      area2.append(value2)
    if sample2.endswith("S9"):
      area3.append(value2)
    if sample2.endswith("S11"):
      area3.append(value2)
    if sample2.endswith("S12"):
      area3.append(value2)
    else:
      area4.append(value2)
    if sample in Stab_Dictionary.keys():

        stabval = Stab_Dictionary[sample]
        pval = stabval[0]
        qval = stabval[1]
        sigma = stabval[2]
    else:
        pval = 'NaN'
        qval = 'NaN'
        sigma = 'NaN'

outfile.write(sample + "\t" + str(np.mean(area1)) + "\t" + str(np.mean(area2)) + "\t" + str(np.mean(area3)) + "\t" + str(np.mean(area4)) + "\t" + str(np.sd(area1)) + "\t" + str(np.sd(area2)) + "\t" + str(np.sd(area3)) + "\t" + str(np.sd(area4)) + "\t" + pval + "\t" + qval + "\t" + sigma + "\n")
Пример #11
0
def standardized_returns(midprices):
    log_midprices = np.log(midprices)
    logreturns = np.diff(log_midprices)
    return (logreturns - np.mean(logreturns)) / np.sd(logreturns)
def song_calc(song_lst):
    if len(song_lst) > 10:
        return numpy.mean(song_lst),numpy.sd(song_lst)
    return "This playlist is not long enough to use these measurements"    
            area1.append(value2)
        if sample2.endswith("S7"):
            area2.append(value2)
        if sample2.endswith("S8"):
            area2.append(value2)
        if sample2.endswith("S10"):
            area2.append(value2)
        if sample2.endswith("S9"):
            area3.append(value2)
        if sample2.endswith("S11"):
            area3.append(value2)
        if sample2.endswith("S12"):
            area3.append(value2)
        else:
            area4.append(value2)
        if sample in Stab_Dictionary.keys():

            stabval = Stab_Dictionary[sample]
            pval = stabval[0]
            qval = stabval[1]
            sigma = stabval[2]
        else:
            pval = "NaN"
            qval = "NaN"
            sigma = "NaN"

outfile.write(sample + "\t" + np.mean(area1) + "\t" + np.mean(area2) + "\t" +
              np.mean(area3) + "\t" + np.mean(area4) + "\t" + np.sd(area1) +
              "\t" + np.sd(area2) + "\t" + np.sd(area3) + "\t" + np.sd(area4) +
              "\t" + pval + "\t" + qval + "\t" + sigma + "\n")
Пример #14
0
def con_general(infile):
    with open(infile, 'r') as f:
        text = f.readlines()
        constraints = [ccondense(cstrip(line)) for line in text if ...]
        conlengths = [len(constraint) for constraint in constraints]
        return conlengths


def cstrip(constraint):
    ...


def ccondense(constraint):  # or just len?
    ...


import numpy
conlengths = con_general('...')
print numpy.mean(conlengths)
print numpy.sd(conlengths)
#print distribution or at least hist
def modelRun(seedComplexity,seedSQ,seedSkillRange,seedDesignerN,seedManagerN,seedAppointeeN,seedIdeologyMean,seedIdeologySD):
	""" main control loop """
	
	# convert complexity to a string length
	complexity = int(round(50 + (200 * seedComplexity),0))

	######
	# check input parameters for errors
	######
	
	if seedComplexity < 0 or seedComplexity > 1:
		print "Please set seedComplexity between 0 and 1"
		return
	if seedSQ < 0.1 or seedSQ > 0.6:
		print "Please set seedSQ between 0.1 and 0.6"	
		return
	if seedDesignerN % 1 == 0 or seedManagerN % 1 == 0 or seedAppointeeN % 1 == 0:
		pass
	else:
		print "Please set the number of agents to a whole number"
		return
	if seedDesignerN == 0 or seedManagerN == 0 or seedAppointeeN == 0:
		print "All agent types should have at least 1 representative"
		return
	else:
		pass
	if seedIdeologySD > 0.2:
		# getting up too high will just result in a highly bi-modal distribution of ideology, which 
		# is fine, but should be noted in any case (because of the truncated distribution)
		print "Ideology deviations greater than 0.2 may yield unbalanced results"
		
	# storage for results from model run
	run_output = {
	"seedComplexity" : seedComplexity,
	"seedSQ" : seedSQ,
	"seedDesignerN" : seedDesignerN,
	"seedManagerN" : seedManagerN,
	"seedAppointeeN" : seedAppointeeN,
	"seedIdeologyMean" : seedIdeologyMean,
	"seedIdeologySD" : seedIdeologySD,
	"seedSkillLow" : seedSkillRange[0],
	"seedSkillHigh" : seedSkillRange[1],
	"seedSkillRange" : abs(seedSkillRange[1] - seedSkillRange[0]),
	"spanOfControl" : seedDesignerN / seedManagerN
	}
	
	print "gen problem part"
	
	# generate the problem to be solved
	problem = genProblem(seedComplexity,complexity,seedSQ)
	problem = ''.join([str(i) for i in problem])
	run_output['problem'] = problem
	
	print "gen agents part"
	
	# generate a list of agents
	# and establish their basic characteristics
	agents = genAgents(seedSkillRange,seedDesignerN,seedManagerN,seedAppointeeN,seedIdeologyMean,seedIdeologySD)
	for a in agents:
		for d in a:
			print 'define skill'
			d.defineSkill(seedSkillRange)
			print 'define ideal'
			d.defineIdeal(complexity)
			print 'define tolerance'
			# NOTE: THIS FUNCTION HAS BEEN HANGING FOR SOME REASON
			#d.defineTolerance(seedSQ)
			
	# split into occupation class lists 
	designers = agents[0]
	managers = agents[1]
	appointees = agents[2]
	
	# merge designers and managers for the design step
	# will only use the merged sometimes, but can be useful
	dlist = designers + managers
	
	# hill climbing deliberation structure
	
	print "starting deliberation"
	revised_proposal = designer_revisions_HC1(designers,managers,problem)
	print "finished deliberation"
	
	proposal = revised_proposal[0]

	run_output['proposal'] = revised_proposal[0]
	run_output['dissatisfaction'] = revised_proposal[1]
	run_output['iterations'] = revised_proposal[2]
	run_output['pctchange_hd'] = revised_proposal[3]
	run_output['rawchange_hd'] = revised_proposal[4]
	run_output['pctimprove'] = revised_proposal[5]
	run_output['pctchange_hw'] = revised_proposal[6]
	run_output['rawchange_hw'] = revised_proposal[7]
	
	# estimate proactivty prior to appointee
	hd_proactivity = hamming_distance(problem,proposal) / len(problem)
	
	sqWeight = seedSQ * len(problem)
	maxWeight = len(problem)
	minWeight = 0
	if maxWeight - sqWeight >= sqWeight - minWeight:
		maxChange = maxWeight - sqWeight
	else:
		maxChange = sqWeight - minWeight
	hw_proactivity = abs(problem.count("1") - proposal.count("1")) / maxChange
	
	run_output['hw_proactivity'] = hd_proactivity
	run_output['hd_proactivity'] = hw_proactivity
	
	# appointee veto
	appointeeResults = []
	for a in appointees:
		
		# check direction of change relative to appointee ideal
		p1weight =  problem.count("1")
		p2weight = proposal.count("1")
		aweight = str(a.getIdeal()).count("1") 
		
		appointeeOutcome = 'na'		
		# proposal reduces agency involvement, appointee prefer decrease
		if p1weight > p2weight and aweight < p1weight:
			appointeeOutcome = "approve"
		# proposal increases agency involvement, appointee prefers increase
		if p1weight <= p2weight and aweight >= p1weight:
			appointeeOutcome = "approve"
		# proposal reduces agency involvement, appointee prefers increase
		if p1weight > p2weight and aweight >= p1weight:
			appointeeOutcome = "reject"
		# proposal increases agency involvement, appointee prefers decrease			
		if p1weight < p2weight and aweight < p1weight:
			appointeeOutcome = "reject"
		# proposal recommends no cahnge in agency involvement, appointee prefers decrease	
		if p1weight == p2weight and aweight < p2weight:
			appointeeOutcome = "approve"
		# proposal recommends no cahnge in agency involvement, appointee prefers increase	
		# this case is covered by second conditional above
		# if p1weight == p2weight and aweight >= p2weight:

		appointeeResults.append(appointeeOutcome)
		
	appointeeOutcome = appointeeResults[0] 
	run_output['appointeeOutcome'] = appointeeOutcome
	
	# add in the agent aggregate statistics
	for d in designers:
		dlist.append(d.getAgentChars()['ideology'])
		slist.append(d.getAgentChars()['skill'])
	designerIdeoMean = numpy.mean(dlist)
	designerIdeoSD = numpy.sd(dlist)
	designerSkillMean = numpy.mean(slist)
	designerSkillSD = numpy.sd(slist)
	for d in managers:
		dlist.append(d.getAgentChars()['ideology'])
		slist.append(d.getAgentChars()['skill'])
	managerIdeoMean = numpy.mean(dlist)
	managerIdeoSD = numpy.sd(dlist)
	managerSkillMean = numpy.mean(slist)
	managerSkillSD = numpy.sd(slist)
	for d in appointees:
		dlist.append(d.getAgentChars()['ideology'])
		slist.append(d.getAgentChars()['skill'])
	appointeeIdeoMean = numpy.mean(dlist)
	appointeeIdeoSD = numpy.sd(dlist)
	appointeeSkillMean = numpy.mean(slist)
	appointeeSkillSD = numpy.sd(slist)
	
	# add to output
	kys = ['designerIdeoMean', 'designerIdeoSD', 'designerSkillMean', 'designerSkillSD', 'managerIdeoMean',
	'managerIdeoSD', 'managerSkillMean', 'managerSkillSD', 'appointeeIdeoMean', 'appointeeIdeoSD', 'appointeeSkillMean', 'appointeeSkillSD']
	vals = [designerIdeoMean, designerIdeoSD, designerSkillMean, designerSkillSD, managerIdeoMean, 
	managerIdeoSD, managerSkillMean, managerSkillSD, appointeeIdeoMean, appointeeIdeoSD, appointeeSkillMean, appointeeSkillSD]
	for i in kys, k in vals:
		run_output[i] = k
		
	# if it's acceptable, check political situtation		
	# if it's politically acceptable, enact and decide
	
	for key,value in run_output.iteritems():
		# if numeric, round off to 4 digits
		if type(value).__name__ == 'int' or type(value).__name__ == 'float':
			run_output[str(key)] = round(run_output[key],4) 
		else:
			pass
			
		print key + ":", str(value)
		
	return run_output