def __init__(self, X=None, Y=None, min_parent=2, max_depth=np.inf, min_score=-1, n_features=None):
		"""
		Constructor for TreeRegressor (decision tree regression model).

		Parameters
		----------
		X : numpy array 
			N x M numpy array which contains N data points with M features.
		Y : numpy array 
			1 x N numpy array that contains values the relate to the data
		  	points in X.
		min_parent : int 
			Minimum number of data required to split a node. 
		min_score : int 
			Minimum value of score improvement to split a node.
		max_depth : int 
			Maximum depth of the decision tree. 
		n_features : int 
			Number of available features for splitting at each node.
		"""
		self.L = arr([0])			# indices of left children
		self.R = arr([0])			# indices of right children
		self.F = arr([0])			# feature to split on (-1 = leaf = predict)
		self.T = arr([0])			# threshold to split on (prediction value if leaf)
	
		if type(X) is np.ndarray and type(Y) is np.ndarray:					
			self.train(X, Y, min_parent, max_depth, min_score, n_features)	# train if data is provided
def drawing_housing_units_nogqs(db, frequencies, weights, index_matrix, sp_matrix, pumano = 0):

    dbc = db.cursor()
    dbc.execute('select hhlduniqueid from hhld_sample group by hhlduniqueid')
    hhld_colno = dbc.rowcount

    hh_colno = hhld_colno
    synthetic_population=[]
    j = 0
    for i in index_matrix[:hh_colno,:]:
        if i[1] == i[2] and frequencies[j]>0:
            synthetic_population.append([sp_matrix[i[1]-1, 2] , frequencies[j], i[0]])
        else:
            cumulative_weights = weights[sp_matrix[i[1]-1:i[2], 2]].cumsum()
            probability_distribution = cumulative_weights / cumulative_weights[-1]
            probability_lower_limit = probability_distribution.tolist()
            probability_lower_limit.insert(0,0)
            probability_lower_limit = arr(probability_lower_limit)
            random_numbers = random.rand(frequencies[j])
            freq, probability_lower_limit = histogram(random_numbers, probability_lower_limit)
            hhldid_by_type = sp_matrix[i[1]-1:i[2],2]

            for k in range(len(freq)):
                if freq[k]<>0:
                    #hhid = hhidRowDict[hhldid_by_type[k]]
                    # storing the matrix row no, freq, type
                    synthetic_population.append([hhldid_by_type[k], freq[k], i[0]])
        j = j + 1

    dbc.close()
    db.commit()
    return arr(synthetic_population, int)
	def __min_weighted_var(self, tsorted, can_split, n):
		"""
		This is a helper method that finds the minimum weighted variance
		among all split points. Used in:
			__dectree_train
		"""
		# compute mean up to and past position j (for j = 0..n)
		y_cum_to = np.cumsum(tsorted, axis=0)
		y_cum_pa = y_cum_to[-1] - y_cum_to
		mean_to = y_cum_to / arr(range(1, n + 1))		
		mean_pa = y_cum_pa / arr(list(range(n - 1, 0, -1)) + [1])

		# compute variance up to, and past position j (for j = 0..n)
		y2_cum_to = np.cumsum(np.power(tsorted, 2), axis=0)
		y2_cum_pa = y2_cum_to[-1] - y2_cum_to
		var_to = (y2_cum_to - 2 * mean_to * y_cum_to + list(range(1, n + 1)) * np.power(mean_to, 2)) / list(range(1, n + 1))
		var_pa = (y2_cum_pa - 2 * mean_pa * y_cum_pa + list(range(n - 1, -1, -1)) * np.power(mean_pa, 2)) / arr(list(range(n - 1, 0, -1)) + [1])
		var_pa[-1] = np.inf

		# find minimum weighted variance among all split points
		weighted_variance = arr(range(1, n + 1)) / n * var_to + arr(range(n - 1, -1, -1)) / n * var_pa
		val = np.nanmin((weighted_variance + 1) / (can_split + 1e-100))			# nan versions of min functions must be used to ignore nans
		idx = np.nanargmin((weighted_variance + 1) / (can_split + 1e-100))		# find only splittable points

		return (val,idx)
Пример #4
0
    def predict(self, X):
        """
        This method makes a nearest neighbor prediction on test data X.
    
        Parameters
        ----------
        X : numpy array 
            N x M numpy array that contains N data points with M features. 
        """
        ntr,mtr = arr(self.X_train).shape              # get size of training data
        nte,mte = arr(X).shape                         # get size of test data

        if m_tr != m_te:
            raise ValueError('knnRegress.predict: training and prediction data must have the same number of features')

        Y_te = np.tile(self.Y_train[0], (n_te, 1))     # make Y_te the same data type as Y_train
        K = min(self.K, n_tr)                          # can't have more than n_tr neighbors

        for i in range(n_te):
            dist = np.sum(np.power((self.X_train - X[i]), 2), axis=1)  # compute sum of squared differences
            sorted_dist = np.sort(dist, axis=0)[:K]           # find nearest neihbors over X_train and...
            sorted_idx = np.argsort(dist, axis=0)[:K]         # ...keep nearest K data points
            wts = np.exp(-self.alpha * sorted_dist)
            Y_te[i] = arr(wts) * arr(self.Y_train[sorted_idx]).T / np.sum(wts)  # weighted average

        return Y_te
    def __init__(self, *args, **kwargs):
        """
        Constructor for treeRegress (decision tree regression model)

        Parameters: see "train" function; calls "train" if arguments passed

        Properties (internal use only)
           L,R : indices of left & right child nodes in the tree
           F,T : feature index & threshold for decision (left/right) at this node
                 for leaf nodes, T[n] holds the prediction for leaf node n
        """
        self.L = arr([0])           # indices of left children
        self.R = arr([0])           # indices of right children
        self.F = arr([0])           # feature to split on (-1 = leaf = predict)
        self.T = arr([0])           # threshold to split on (prediction value if leaf)
        
        
        self.information_gain = dict()
        self.nX = dict() #keeps track of remaining data on that branch
        self.nY = dict() #left branch and right branch
#        self.bestval = dict()
        self.div = defaultdict(list) #        [best_feat,best_thresh]
        self.gain = defaultdict(int) #        best_val

         
        if len(args) or len(kwargs):     # if we were given optional arguments,
            self.train(*args, **kwargs)    #  just pass them through to "train"
Пример #6
0
    def predictSoft(self, X):
        """
        This method makes a "soft" nearest-neighbor prediction on test data.

        Parameters
        ----------
        X : M x N numpy array 
            M = number of testing instances; N = number of features.  
        """
        mtr,ntr = arr(self.X_train).shape      # get size of training data
        mte,nte = arr(X).shape                 # get size of test data
        if nte != ntr:
            raise ValueError('Training and prediction data must have same number of features')
        
        num_classes = len(self.classes)
        prob = np.zeros((mte,num_classes))     # allocate memory for class probabilities
        K = min(self.K, mtr)                   # (can't use more neighbors than training data points)
        for i in range(mte):                   # for each test example...
            # ...compute sum of squared differences...
            dist = np.sum(np.power(self.X_train - arr(X)[i,:], 2), axis=1)
            # ...find nearest neighbors over training data and keep nearest K data points
            sorted_dist = np.sort(dist, axis=0)[0:K]                
            indices = np.argsort(dist, axis=0)[0:K]             
            wts = np.exp(-self.alpha * sorted_dist)
            count = []
            for c in range(len(self.classes)):
                # total weight of instances of that classes
                count.append(np.sum(wts[self.Y_train[indices] == self.classes[c]]))
            count = np.asarray(count)
            prob[i,:] = np.divide(count, np.sum(count))       # save (soft) results
        return prob
Пример #7
0
	def test_bothModels(self):
		fun1 = functions.DistanceToCircle(arr([ 10,  10]), .5)
		fun2 = functions.DistanceToCircle(arr([-10, -10]), 5)
		set = dfo_model.MultiFunctionModel([fun1, fun2], self.b, self.center, self.radius)
		set.improve(None)
		center = arr([3,4])

		for i in range(50):
			print("testing " + str(i) + " of " + str(50))
			rFactor = self.getRFactor()
			newRadius = set.modelRadius * rFactor
			center = center + set.modelRadius / newRadius
			set.testNewModelCenter(center)
			set.setNewModelCenter(center)
			set.multiplyRadius(rFactor)
			set.improve('images/test_both_%04d_improve.png' % i)

			quadmod1 = set.getQuadraticModels(arr([0, 1], int))
			quadmod2 = set.getQuadraticModels2(arr([0, 1], int))
			for j in range(10):
				x = center + 10 * (2 * random.random(2) - 1)
				y1 = quadmod1.evaluate(x)
				y2 = quadmod2.evaluate(x)

				self.assertTrue(norm(y1 - y2) < self.tolerance)

				y1 = quadmod1.jacobian(x)
				y2 = quadmod2.jacobian(x)

				self.assertTrue(norm(y1 - y2) < self.tolerance)
Пример #8
0
def plot_lum():
    clf()
    j_3min = [8052.06, 3050.04, 324.251, 20082.0, 1443.05, 1070.26, 1879.54, 3210.33, 312.932, 233.877, 714.423, 112.846, 126.616]
    j_3min2 = [8052.06, 3050.04, 324.251, 1443.05, 1070.26, 1879.54, 3210.33, 312.932, 233.877, 714.423, 112.846, 126.616]
    j_3min3 = [3050.04, 324.251, 1443.05, 1070.26, 1879.54, 3210.33, 312.932, 233.877, 714.423, 112.846, 126.616]

    j_3min = [8052.06, 3050.04, 324.251, 20082.0, 1443.05, 1070.26, 1879.54, 3210.33, 312.932, 233.877, 714.423, 188.211, 1594, 57.29, 833466.82317]

    #convert to cgs from microjansky:
    j_3min = arr(j_3min)*10**(-29)

    #convert to AB magnitude:
    j_3min = -2.5*numpy.log10(j_3min) - 48.60
    
    hist(j_3min,13)
    xlabel('$m_j$', fontsize=28)
    ylabel('Number', fontsize=28)
    yticks(arr([0, 1., 2., 3., 4.]))
    ax = matplotlib.pyplot.gca()
    ax.set_xlim(ax.get_xlim()[::-1]) # reversing the xlimits
    savefig('Lum_dist.eps')

    clf()
 #   hist(j_3min,20,cumulative=True, histtype='step')
  #  hist(j_3min2,20,cumulative=True, histtype='step')
   # hist(j_3min3,20,cumulative=True, histtype='step')
    #ylim(0,14)
    #xlim(-1000,22000)
    #xlabel('J Flux at 3 Minutes (Micro Jansky)')
  #  savefig('lum_dist.eps')
    return j_3min
Пример #9
0
def drawing_housing_units(db, frequencies, weights, index_matrix, sp_matrix, pumano=0):

    dbc = db.cursor()
    dbc.execute("select hhlduniqueid from hhld_pums group by hhlduniqueid")
    hhld_colno = dbc.rowcount
    dbc.execute("select gquniqueid from gq_pums group by gquniqueid")
    gq_colno = dbc.rowcount

    hh_colno = hhld_colno + gq_colno
    synthetic_population = []
    j = 0
    for i in index_matrix[:hh_colno, :]:
        if i[1] == i[2] and frequencies[j] > 0:
            synthetic_population.append([sp_matrix[i[1] - 1, 2] + 1, frequencies[j], i[0]])
            print "hhid single", sp_matrix[i[1] - 1, 2]
        else:
            cumulative_weights = weights[sp_matrix[i[1] - 1 : i[2], 2]].cumsum()
            probability_distribution = cumulative_weights / cumulative_weights[-1]
            probability_lower_limit = probability_distribution[:-1].tolist()
            probability_lower_limit.insert(0, 0)
            probability_lower_limit = arr(probability_lower_limit)
            random_numbers = random.rand(frequencies[j])
            freq, probability_lower_limit = histogram(random_numbers, probability_lower_limit)
            hhldid_by_type = sp_matrix[i[1] - 1 : i[2], 2]

            for k in range(len(freq)):
                if freq[k] <> 0:
                    synthetic_population.append([hhldid_by_type[k] + 1, freq[k], i[0]])
        j = j + 1

    dbc.close()
    db.commit()
    return arr(synthetic_population)
Пример #10
0
 def addVars(self):
     bus,branch,_,_, n,nl,_,_,_,_,gens = self.data + self.aux
     if self.verbose: print 'defining variables'
     INF = 1e100
     if self.solver == 'cplex':
         p = ['p_%d'%i for i in gens]
         a = ['a_%d'%i for i in gens]
         D = ['D_%d'%i for i in bus]
         t = ['t_%d'%i for i in bus]
         m = ['m{}'.format(i['id']) for i in branch] 
         s = ['s{}'.format(i['id']) for i in branch]
         self.M.variables.add(names = p + a)
         self.M.variables.add(names = D + t, lb = [-INF]*2*n)
         #self.M.variables.add(names = m, lb = [-INF]*nl)
         #self.M.variables.add(names = s)
         self.M.variables.add(names = m + s, lb = [-INF]*2*nl)
         D, t = arr(D), arr(t)
         self.var = (p, a, D, t, m, s)
     else:
         p = {i: self.M.addVar(name='pbar_%d'%i) for i in gens}
         a = {i: self.M.addVar(name='alpha_%d'%i) for i in gens}
         D = {i: self.M.addVar(lb=-INF, name='delta_%d'%i) for i in bus}
         t = {i: self.M.addVar(lb=-INF, name='theta_%d'%i) for i in bus}
         m = {i['id']: self.M.addVar(lb=-INF, name='fbar{}'.format(i['id'])) for 
                 i in branch}
         s = {i['id']: self.M.addVar(lb=-INF, name='std{}'.format(i['id'])) for 
                 i in branch}
         self.var = (p, a, D, t, m, s)
         self.M.update()
	def init_weights(self, sizes, init='zeros', X=None, Y=None):
		"""
		This method initializes the weights of the neural network.
		Set layer sizes to S = [Ninput, N1, N2, ... Noutput] and set
		using 'fast' method ('none', 'random', 'zeros'). Refer to
		constructor doc string for argument descriptions.

		TODO:
			implement autoenc
			implement regress
		"""
		init = init.lower()

		if init == 'none':
			pass				# no init: do nothing

		elif init == 'zeros':
			self.wts = arr([np.zeros((sizes[i + 1], sizes[i] + 1)) for i in range(len(sizes) - 1)], dtype=object)

		elif init == 'random':
			self.wts = arr([.25 * np.random.randn(sizes[i + 1], sizes[i] + 1) for i in range(len(sizes) - 1)], dtype=object)

		elif init == 'autoenc':
			pass

		elif init == 'regress':
			pass

		else:
			raise ValueError('NNetRegress.init_weights: \'' + init + '\' is not a valid argument for init')
Пример #12
0
def MixedN(ls):
    """
    ls: a list of either lists or dictionaries.
    """
    
    if (len(ls)==1):
        if type(ls[0])==list:
            return [item/float(sum(ls[0])) for item in ls[0]]
        elif type(ls[0])==dict:
            return {key:value/float(sum(ls[0].values())) for key, value in ls[0].items()}

    lamb = 1.0/len(ls)
    if (sum([type(it)==list for it in ls])==len(ls)):
        total=arr([0]*len(ls[0]));
        for it in ls:
            total= total + arr([n/float(sum(it)) for n in it])
        mix = total*lamb
        return mix

    elif (sum([type(it)==dict for it in ls])==len(ls)):
        keys=set([])
        for it in ls:
            keys.update(set(it.keys()))
        mix={key:sum([(float(1)/sum(it.values()))*it.get(key, 0)*lamb for it in ls]) for key in keys}
        return mix
def create_adjusted_frequencies(db, synthesis_type, control_variables, pumano, tract= 0, bg= 0):
    dbc = db.cursor()
    dummy_order_string = create_aggregation_string(control_variables)
    puma_table = ('%s_%s_joint_dist'%(synthesis_type, pumano))
    pums_table = ('%s_%s_joint_dist'%(synthesis_type, 0))

    dbc.execute('select * from %s where tract = %s and bg = %s order by %s' %(puma_table, tract, bg, dummy_order_string))
    puma_joint = arr(dbc.fetchall(), float)
    puma_prob = puma_joint[:,-2] / sum(puma_joint[:,-2])
    upper_prob_bound = 0.5 / sum(puma_joint[:,-2])

    dbc.execute('select * from %s order by %s' %(pums_table, dummy_order_string))
    pums_joint = arr(dbc.fetchall(), float)
    pums_prob = pums_joint[:,-2] / sum(pums_joint[:,-2])


    puma_adjustment = (pums_prob <= upper_prob_bound) * pums_prob + (pums_prob > upper_prob_bound) * upper_prob_bound
    correction = 1 - sum((puma_prob == 0) * puma_adjustment)
    puma_prob = ((puma_prob <> 0) * correction * puma_prob +
                 (puma_prob == 0) * puma_adjustment)
    puma_joint[:,-2] = sum(puma_joint[:,-2]) * puma_prob

    dbc.execute('delete from %s where tract = %s and bg = %s'%(puma_table, tract, bg))
    puma_joint_dummy = str([tuple(i) for i in puma_joint])
    dbc.execute('insert into %s values %s' %(puma_table, puma_joint_dummy[1:-1]))
    dbc.close()
    db.commit()
Пример #14
0
    def __dectree_train(self, X, Y, L, R, F, T, next, depth, minParent, maxDepth, minScore, nFeatures):
        """
        This is a recursive helper method that recusively trains the decision tree. Used in:
            train

        TODO:
            compare for numerical tolerance
        """
        n,d = mat(X).shape

        # check leaf conditions...
        if n < minParent or depth >= maxDepth or np.var(Y) < minScore:
            assert n != 0, ('TreeRegress.__dectree_train: tried to create size zero node')
            return self.__output_leaf(Y, n, L, R, F, T, next)

        best_val = np.inf
        best_feat = -1
        try_feat = np.random.permutation(d)

        # ...otherwise, search over (allowed) features
        for i_feat in try_feat[0:nFeatures]:
            dsorted = arr(np.sort(X[:,i_feat].T)).ravel()                       # sort data...
            pi = np.argsort(X[:,i_feat].T)                                      # ...get sorted indices...
            tsorted = Y[pi].ravel()                                             # ...and sort targets by feature ID
            can_split = np.append(arr(dsorted[:-1] != dsorted[1:]), 0)          # which indices are valid split points?

            if not np.any(can_split):          # no way to split on this feature?
                continue

            # find min weighted variance among split points
            val,idx = self.__min_weighted_var(tsorted, can_split, n)

            # save best feature and split point found so far
            if val < best_val:
                best_val = val
                best_feat = i_feat
                best_thresh = (dsorted[idx] + dsorted[idx + 1]) / 2

        # if no split possible, output leaf (prediction) node
        if best_feat == -1:         
            return self.__output_leaf(Y, n, L, R, F, T, next)

        # split data on feature i_feat, value (tsorted[idx] + tsorted[idx + 1]) / 2
        F[next] = best_feat
        T[next] = best_thresh
        go_left = X[:,F[next]] < T[next]
        my_idx = next
        next += 1

        # recur left
        L[my_idx] = next    
        L,R,F,T,next = self.__dectree_train(X[go_left,:], Y[go_left], L, R, F, T, 
            next, depth + 1, minParent, maxDepth, minScore, nFeatures)

        # recur right
        R[my_idx] = next    
        L,R,F,T,next = self.__dectree_train(X[np.logical_not(go_left),:], Y[np.logical_not(go_left)], L, R, F, T, 
            next, depth + 1, minParent, maxDepth, minScore, nFeatures)

        return (L,R,F,T,next)
Пример #15
0
def load_data_from_csv(csv_path, label_index, trans_func=lambda x: x):
	"""
	Function that loads from a CSV into main memory.

	Parameters
	----------
	csv_path : str
		Path to CSV file that contains data.
	label_indes : int
		The index in the CSV rows that contains the label
		for each data point.
	trans_func : function object
		Function that transform values in CSV, i.e.: str -> int.

	Returns
	-------
	data,labels : (list)
		Tuple that contains a list of data points (index 0) and
		a list of labels corresponding to thos data points (index 1).
	"""
	data = []
	labels = []

	with open(csv_path) as f:
		csv_data = reader(f)
	
		for row in csv_data:
			row = list(map(trans_func, row))

			labels.append(row.pop(label_index))
			data.append(row)

	return arr(data),arr(labels)
def prepare_control_marginals(db, synthesis_type, control_variables, varCorrDict, controlAdjDict,
                              state, county, tract, bg, hhldsizeMargsMod=False):

    dbc = db.cursor()
    marginals = database(db, '%s_marginals'%synthesis_type)
    variable_names = marginals.variables()
    control_marginals = []
    #control_marginals_sum = []
    for dummy in control_variables:
        dbc.execute('select %s from %s_sample group by %s' %(dummy, synthesis_type, dummy))
        cats = arr(dbc.fetchall(), float)
        #print dummy, cats

        selVar = dummy
        selGeography = "%s,%s,%s,%s" %(state, county, tract, bg)
        
        variable_marginals1=[]
        try:
            #print hhldsizeMargsMod
            if (not hhldsizeMargsMod and synthesis_type == 'hhld') or synthesis_type <> 'hhld':
                #print 'household not modified in correspondence'
                variable_marginals_adj = controlAdjDict[selGeography][selVar]
            #print 'adjustment', variable_marginals_adj[0], variable_marginals_adj[1]
                for i in variable_marginals_adj[1]:
                    if i>0:
                        variable_marginals1.append(i)
                    else:
                        variable_marginals1.append(0.1)
            #check_marginal_sum = sum(variable_marginals1)
            else:
                raise Exception, 'Household marginal distributions modified to account for person total inconsistency'
        except Exception ,e:
            #print 'Exception: %s' %e

            #check_marginal_sum = 0
            for i in cats:
                corrVar = varCorrDict['%s%s' %(dummy, int(i[0]))]
                dbc.execute('select %s from %s_marginals where county = %s and tract = %s and bg = %s' %(corrVar, synthesis_type, county, tract, bg))
                result = arr(dbc.fetchall(), float)
                #check_marginal_sum = result[0][0] + check_marginal_sum

                if result[0][0] > 0:
                    variable_marginals1.append(result[0][0])
                else:
                    variable_marginals1.append(0.1)

        #exceptionStatus = False

        #if check_marginal_sum == 0 and (synthesis_type == 'hhld'):
        #    exceptionStatus = True
        #if check_marginal_sum == 0 and (synthesis_type == 'person'):
        #    exceptionStatus = True

            

        #if check_marginal_sum == 0 and (synthesis_type == 'hhld' or synthesis_type == 'person'):
        #    print 'Exception: The given marginal distribution for a control variable sums to zero.'
            #raise Exception, 'The given marginal distribution for a control variable sums to zero.'
        control_marginals.append(variable_marginals1)
Пример #17
0
def populate_master_matrix(db, pumano, hhld_units, gq_units, hhld_dimensions, gq_dimensions):
# First we create an empty matrix based on the dimensions of the hhhld, gq control variables
    hhld_types = arr(hhld_dimensions).prod()
    gq_types = arr(gq_dimensions).prod()

# We add 2 more columns to also store the puma id, and housing pums id. Also note that the matrix indices start from 0
# Layout of the master matrix is as follows - puma id (0 th column), housing pums id, hhld types frequency,
# gq types frequency
    total_cols = 4 + hhld_types + gq_types
    total_rows = hhld_units + gq_units
    matrix = sparse.lil_matrix((total_rows, total_cols))

# In this part we populate the matrix
    dbc = db.cursor()
    rowHhidDict = {}
    row = 0
    for control_type in ['hhld', 'gq']:
# Here we determine the starting column in the master matrix for the hhld types, gq types frequency within each home
	if control_type == 'hhld':
            start = 3
        elif control_type == 'gq':
	    start = 3 + arr(hhld_dimensions).prod()

# Read the pums data from the mysql files to
        if pumano == 0 or pumano == 99999:
            dbc.execute('Select state, pumano, hhid, serialno, %suniqueid from %s_sample order by hhid'
                        %(control_type, control_type))
        else:
            dbc.execute('Select state, pumano, hhid, serialno, %suniqueid from %s_sample where pumano = %s order by hhid'
                        %(control_type, control_type, pumano))

        result = arr(dbc.fetchall(), int64)


# Master Matrix is populated here

        if control_type == 'hhld':

            for i in result[:,2]:
# Storing the pumano, housing puma id for all housing units
                rowHhidDict[i] = row
                matrix[row,:4] = result[row,:4]
                row = row + 1

        if control_type == 'gq':
            for i in result[:,2]:
                rowHhidDict[i] = row
                matrix[row,:4] = result[(row - hhld_units), :4]
                row = row + 1

# Populating the household type, gq type
	for i in range(dbc.rowcount):
            matRow = rowHhidDict[result[i, 2]]
            matrix[matRow, start+result[i, -1]] = matrix[matRow, start+result[i, -1]] + 1

    dbc.close()
    db.commit()
    return matrix
def create_joint_dist(db, synthesis_type, control_variables, dimensions, pumano = 0, tract = 0, bg = 0):

    dbc = db.cursor()
    pums = database(db, '%s_pums'%synthesis_type)
    dummy = create_aggregation_string(control_variables)

    table_rows = dimensions.cumprod()[-1]
    table_cols = len(dimensions) + 4
    dummy_table = zeros((table_rows, table_cols), dtype =int)
    index_array = num_breakdown(dimensions)


    try:
        dbc.execute('create table %s_%s_joint_dist select %s from %s_pums where 0 '%(synthesis_type, pumano, dummy, synthesis_type))
        dbc.execute('alter table %s_%s_joint_dist add pumano int first'%(synthesis_type, pumano))
        dbc.execute('alter table %s_%s_joint_dist add tract int after pumano'%(synthesis_type, pumano))
        dbc.execute('alter table %s_%s_joint_dist add bg int after tract'%(synthesis_type, pumano))
        dbc.execute('alter table %s_%s_joint_dist add frequency float(27)'%(synthesis_type, pumano))
        dbc.execute('alter table %s_%s_joint_dist add index(tract, bg)'%(synthesis_type, pumano))
    except:
#        print 'Table %s_%s_joint_dist present' %(synthesis_type, pumano)
        pass

    variable_list = 'pumano, tract, bg, '
    for i in control_variables:
        variable_list = variable_list + i + ', '
    variable_list = variable_list + 'frequency'

    if pumano ==0:
        dbc.execute('select %s, count(*), %suniqueid from %s_pums group by %s '%(dummy, synthesis_type, synthesis_type, dummy))
        #print ('select %s, count(*), %suniqueid from %s_pums group by %s '%(dummy, synthesis_type, synthesis_type, dummy))
        result = arr(dbc.fetchall())
        dummy_table[:,:3] = [pumano, tract, bg]
        dummy_table[:,3:-1] = index_array
        dummy_table[result[:,-1]-1,-1] = result[:,-2]
    else:
        dbc.execute('select %s, count(*), %suniqueid from %s_pums where pumano = %s group by %s '%(dummy, synthesis_type, synthesis_type, pumano, dummy))
        result = arr(dbc.fetchall())
        dummy_table[:,:3] = [pumano, tract, bg]
        dummy_table[:,3:-1] = index_array
        dummy_table[result[:,-1]-1,-1] = result[:,-2]


    dbc.execute('delete from %s_%s_joint_dist where tract = %s and bg = %s' %(synthesis_type, pumano, tract, bg))
    dummy_table = str([tuple(i) for i in dummy_table])

    #try:
    #    dbc.execute('alter table %s_%s_joint_dist drop column %suniqueid' %(synthesis_type, pumano, synthesis_type))
    #except:
    #    pass

    dbc.execute('insert into %s_%s_joint_dist (%s) values %s' %(synthesis_type, pumano, variable_list, dummy_table[1:-1]))
    dbc.close()

    update_string = create_update_string(db, control_variables, dimensions)
    add_unique_id(db, '%s_%s_joint_dist' %(synthesis_type, pumano), synthesis_type, update_string)

    db.commit()
def create_whole_frequencies(db, synthesis_type, order_string, pumano = 0, tract = 0, bg = 0):
    dbc = db.cursor()
    table_name = ('%s_%s_ipf'%(synthesis_type, pumano))



    try:
        dbc.execute('create table %s select pumano, tract, bg, frequency from hhld_%s_joint_dist where 0;' %(table_name, pumano))
        dbc.execute('alter table %s change frequency marginal float(27)'%(table_name))
        dbc.execute('alter table %s add prior int default 0' %(table_name))
        dbc.execute('alter table %s add r_marginal int default 0'%(table_name))
        dbc.execute('alter table %s add diff_marginals float(27) default 0'%(table_name))
        dbc.execute('alter table %s add %suniqueid int'%(table_name, synthesis_type))
        dbc.execute('alter table %s add index(tract, bg)'%(table_name))
    except:
        pass
    dbc.execute('select frequency from %s_%s_joint_dist where tract = %s and bg = %s order by %s;' %(synthesis_type, pumano, tract, bg, order_string))
    frequency = arr(dbc.fetchall())

    dbc.execute('select frequency from %s_0_joint_dist order by %s' %(synthesis_type, order_string))
    prior = arr(dbc.fetchall())

    rowcount = dbc.rowcount
    dummy_table = zeros((rowcount, 6))
    dummy_table[:,:-3] = [pumano, tract, bg]
    dummy_table[:,-3] = frequency[:,0]
    dummy_table[:,-2] = prior[:,0]
    dummy_table[:,-1] = (arange(rowcount)+1)

    dbc.execute('delete from %s where tract = %s and bg = %s' %(table_name, tract, bg))
    dummy_table = str([tuple(i) for i in dummy_table])
    dbc.execute('insert into %s (pumano, tract, bg, marginal, prior, %suniqueid) values %s;' %(table_name, synthesis_type, dummy_table[1:-1]))
    dbc.execute('update %s set r_marginal = marginal where tract = %s and bg = %s'%(table_name, tract, bg))
    dbc.execute('update %s set diff_marginals = (marginal - r_marginal) * marginal where tract = %s and bg = %s'%(table_name, tract, bg))
    dbc.execute('select sum(marginal) - sum(r_marginal) from %s where tract = %s and bg = %s'%(table_name, tract, bg))
    result = dbc.fetchall()
    diff_total = round(result[0][0])


    if diff_total < 0:
        dbc.execute('select %suniqueid from %s where r_marginal <>0 and tract = %s and bg = %s order by diff_marginals '%(synthesis_type, table_name, tract, bg))
    else:
        dbc.execute('select %suniqueid from %s where marginal <>0 and tract = %s and bg = %s order by diff_marginals desc'%(synthesis_type, table_name, tract, bg))
    result = dbc.fetchall()

#    print 'The marginals corresponding to the following hhldtypes were changed by the given amount'

    for i in range(int(abs(diff_total))):
#        print 'record - %s changed by %s' %(result[i][0], diff_total / abs(diff_total))
        dbc.execute('update %s set r_marginal = r_marginal + %s where %suniqueid = %s and tract = %s and bg = %s' %(table_name, diff_total / abs(diff_total), synthesis_type, result[i][0], tract, bg))

    dbc.execute('select r_marginal from %s where prior <> 0 and tract = %s and bg = %s order by %suniqueid'%(table_name, tract, bg, synthesis_type))
    marginals = arr(dbc.fetchall())
    dbc.close()
    db.commit()
    return marginals
def tolerance (adjustment_all, adjustment_old, iteration, parameters):
    adjustment_all = arr(adjustment_all)
    adjustment_old = arr(adjustment_old)
    adjustment_difference = abs(adjustment_all - adjustment_old)
    adjustment_convergence_characteristic = adjustment_difference.cumsum()[-1]
    if adjustment_convergence_characteristic > parameters.ipfTol:
        return 1
    else:
#        print "Convergence Criterion - %s" %adjustment_convergence_characteristic
        return 0
Пример #21
0
 def constaint_weights(self):
     """Rescales the weights so that the maximum element of the matrix
        is 1.
     """
     max_values = self.weight_constaint_function()
     for i in range(len(self.weights)):
         if arr(max_values[i]) > 1.0:
             print "Constaining weights No " + str(i) + ": Divide by " + str(arr(max_values[i]))
             self.weights[i].set_value(np.float32(self.weights[i].get_value() / arr(max_values[i])))
             print "New max value: " + str(arr(self.weight_constaint_function()[i]))
Пример #22
0
    def __init__(
        self,
        result_dir,
        data_X,
        data_y,
        data_t,
        valid_X,
        valid_y,
        valid_t,
        test_X,
        layers,
        weight_init_scheme,
        cost_function,
        hyperparams,
        training_sequence,
    ):
        """Class that takes layers and combines them into a neural network. Theano functions
            are created and the given data is then trained with the GPU on the constructed 
            neural network through the train method.
        """

        self.cost_function = cost_function

        self.H = hyperparams
        if training_sequence:
            self.training_sequence = training_sequence
        else:
            self.training_sequence = self.default_training_sequence
        self.layers = layers
        self.rng = np.random.RandomState(1234)

        self.time1 = time.time()

        self.ensemble = None
        self.ensemble_MNIST = None
        self.ensemble_softmax = None

        self.hook_functions = None

        self.weight_init_scheme = weight_init_scheme
        self.init_weight_and_data_variables(data_X, data_y, data_t, valid_X, valid_y, valid_t, test_X)

        self.init_theano_variables()

        self.train_history = np.float32(arr(range(self.H.L.epochs)))
        self.valid_history = np.float32(arr(range(self.H.L.epochs)))

        self.result_dir = result_dir

        self.hook_functions_batch = None
        self.hook_functions_crossvalid = None
        self.hook_functions_crossvalid_epoch = None
        pass
def drawing_with_replacement(db, frequencies, weights, index_matrix, sp_matrix, pumano = 0, seed=0, iteration=0):
    if seed == 0:
	seed = int(frequencies.sum())	
    random.seed(seed+iteration)

    dbc = db.cursor()

    dbc.execute('select hhlduniqueid from hhld_sample group by hhlduniqueid')
    hhld_colno = dbc.rowcount
    dbc.execute('select gquniqueid from gq_sample group by gquniqueid')
    gq_colno = dbc.rowcount

    hh_colno = hhld_colno + gq_colno
    synthetic_population=[]
    j = 0
    for i in index_matrix[:hh_colno,:]:
        if i[1] == i[2] and frequencies[j]>0:
            synthetic_population.append([sp_matrix[i[1]-1, 2] , frequencies[j], i[0]])
        else:
            cumulative_weights = weights[sp_matrix[i[1]-1:i[2], 2]].cumsum()
            probability_distribution = cumulative_weights / cumulative_weights[-1]
	    ti = time.time()
	    #print probability_distribution, type(probability_distribution)
            probability_lower_limit = probability_distribution.tolist()
            probability_lower_limit.insert(0,0)
            probability_lower_limit = arr(probability_lower_limit)
	    #print 'after insertion and conversion - ', probability_lower_limit, type(probability_lower_limit)
	    #print 'time taken - %.4f' %(time.time()-ti)

	    ti = time.time()
            random_numbers = random.rand(frequencies[j])
            freq, probability_lower_limit = histogram(random_numbers, probability_lower_limit)
	    #print 'time taken for random number generation and histogram - %.4f' %(time.time()-ti)

	    ti = time.time()
            hhldid_by_type = sp_matrix[i[1]-1:i[2],2]

	    freqValid = freq[freq<>0]
	    hhldid_by_typeValid = hhldid_by_type[freq<>0]

	    ti = time.time()
            for k in range(len(freqValid)):
  		synthetic_population.append([hhldid_by_typeValid[k], freqValid[k], i[0]])
	    #print 'Old implementation - %.4f' %(time.time()-ti)


        j = j + 1

    dbc.close()
    db.commit()

    return arr(synthetic_population, int)
def adjust_weights(db, synthesis_type, control_variables, varCorrDict, controlAdjDict,
                    state, county, pumano=0, tract=0, bg=0, parameters=0, hhldsizeMargsMod=False):
    dbc = db.cursor()

    control_marginals = prepare_control_marginals (db, synthesis_type, control_variables, varCorrDict, 
                                                   controlAdjDict, state, county, tract, bg, hhldsizeMargsMod)

    tol = 1
    iteration = 0
    adjustment_old = []
    target_adjustment = []

    while (tol):
        iteration = iteration +1
        adjustment_all = []
        for i in range(len(control_variables)):
            adjusted_marginals = marginals(db, synthesis_type, control_variables[i], pumano, tract, bg)
            for j in range(len(adjusted_marginals)):
                if adjusted_marginals[j] == 0:
                    adjusted_marginals[j] = 1

            adjustment = arr(control_marginals[i]) / arr(adjusted_marginals)

            update_weights(db, synthesis_type, control_variables, control_variables[i], pumano, tract, bg, adjustment)

            for k in adjustment:
                adjustment_all.append(k)
                if iteration == 1:
                    if k == 0:
                        adjustment_old.append(0)
                    else:
                        adjustment_old.append(k/k)
                    target_adjustment = [adjustment_old]

        tol = tolerance(adjustment_all, adjustment_old, iteration, parameters)
        adjustment_old = adjustment_all
        adjustment_characteristic = abs(arr(adjustment_all) - arr(target_adjustment)).sum() / len(adjustment_all)
	if not tol:
            print control_variables[i], control_marginals[i], adjusted_marginals
    if (iteration>=parameters.ipfIter):
        pass
#        print "Maximum iterations reached\n"
    else:
#        print "Convergence Achieved in iterations - %s\n" %iteration
        pass

#    print "Marginals off by - %s" %adjustment_characteristic

    dbc.close()
    db.commit()
Пример #25
0
  def err(self, X, Y):
    """
    This method computes the error rate on test data.  

    Parameters
    ---------
    X : M x N numpy array 
      M = number of data points; N = number of features. 
    Y : M x 1 numpy array    
      Array of classes (targets) corresponding to the data points in X.
    """
    Y    = arr( Y )
    Yhat = arr( self.predict(X) )
    return np.mean(Yhat.reshape(Y.shape) != Y)
    def __dectree_train(self, X, Y, L, R, F, T, next, minParent, minScore, nFeatures):
        """
        Zach, Sharon, and Janice's decision tree training function: based on handling complexity through
        the maximum number of leaves.

        TODO:
            1) Create a structure that holds the [decision and information gain (from that decision)]
                for each possible node
            2) Iterate through and create tree: 
                // within a while loop (while leaves != maxLeaves)

                ROOT: (when leaves == 0). choose the one with most(???) entropy from all possible
                    take ROOT out of 

                a. At the creation of each new tree node (or leaf), calculate the new [decision and info gain]
                    pairs that become available
                b. construct tree
        """
        n, d = mat(X).shape
        if n < minParent or np.var(Y) < minScore:
            assert n != 0, ('TreeRegress.__dectree_train: tried to create size zero node')
            # TODO: return something. maybe get rid of this whole conditional since it seems to be only used
            #           for recursion halting.

        best_val = np.inf
        best_feat = -1
        try_feat = np.random.permutation(d)

        # ...otherwise, search over (allowed) features
        for i_feat in try_feat[0:nFeatures-1]:
            dsorted = arr(np.sort(X[:,i_feat].T)).ravel()                       # sort data...
            pi = np.argsort(X[:,i_feat].T)                                      # ...get sorted indices...
            tsorted = Y[pi].ravel()                                             # ...and sort targets by feature ID
            can_split = np.append(arr(dsorted[:-1] != dsorted[1:]), 0)          # which indices are valid split points?

            if not np.any(can_split):          # no way to split on this feature?
                continue

            # find min weighted variance among split points
            val,idx = self.__min_weighted_var(tsorted, can_split, n)

            # save best feature and split point found so far
            if val < best_val:
                best_val = val
                best_feat = i_feat
                best_thresh = (dsorted[idx] + dsorted[idx + 1]) / 2
        
        return best_feat, best_thresh, best_val
Пример #27
0
def plot_lum_rest():
    '''f_{rest,V} = f_{rest_corr}*[nu_V/ ((1+z)nu_J)]^beta for  flux \propto nu^beta and beta negative values'''
    clf()
    f_rest_corr = [2252.14, 1626.48, 403.717, 11783.2, 913.329, 549.616, 286.863, 990.110, 14.7689, 174.540, 1419.79, 149309.80115] 
    beta = [-1.35, -0.8, -0.96, -0.22, -1.73, -0.84, -3.48, -0.42, -3.81, -0.3, -1.7, -0.47]
    z_list_limits = [1.1588, 2.4274, 1.51, 0.54, 1.95, 1.6, 3.036, 2.346, 3.5, 1.165, 4.8, 0.9382]
    arrf = arr(f_rest_corr)
    arrb = arr(beta)
    arrz = arr(z_list_limits)
    nu_V = 5.444646098003629764065335753176043557e+14 
    nu_J = 2.398339664e+14
    f_rest_V = arrf * (nu_V/ ((1+arrz)*nu_J))**(beta)
    print 'f_rest_V in microjansky:'
    print f_rest_V

    #convert to cgs from microjansky:
    f_rest_V = f_rest_V*10**(-29)
    print 'f_rest_V in cgs:'
    print f_rest_V

    #get luminosity distance from cosmocalc (lambdaCDM: omega_M = 0.27 and omega_lambda=0.73)
    dist = []
    for redshift in z_list_limits:
        dist += [cosmocalc.cosmocalc(z=redshift)['DL_cm']]
    
    arrd = arr(dist)
    print 'dist:'
    print arrd

    L_rest_V = f_rest_V*4*numpy.pi*arrd**2./(1.+arrz)
    print 'L_rest_V:'
    print L_rest_V
    #convert to ABSOLUTE AB magnitude:
    parsec = 3.085677581e18 # cm
    F_10pc = L_rest_V/(4 * numpy.pi * (10*parsec)**2)    #flux density at 10 parsecs     
    Absol_Mag = -2.5*numpy.log10(F_10pc) - 48.60    #Absolute mag in AB mag
    
    hist(Absol_Mag,6)
    xlabel('$M_v$', fontsize=27)
    ylabel('Number', fontsize=28)
    yticks(arr([0, 1., 2., 3., 4.]))
    ax = matplotlib.pyplot.gca()
    ax.set_xlim(ax.get_xlim()[::-1]) # reversing the xlimits
    savefig('Lum_dist_rest.eps')

    print 'Done'
    
    return Absol_Mag
Пример #28
0
    def train(self, X, Y, minParent=2, maxDepth=np.inf, nFeatures=None):
        """
        Trains a random forest classification tree.

        Parameters
        ----------
        X : M x N numpy array of M data points with N features each.
        Y : M x 1 numpy array containing class labels for each data point in X. 
        minParent : (int) The minimum number of data required to split a node. 
        maxDepth  : (int) The maximum depth of the decision tree. 
        nFeatures : (int) The number of available features for splitting at each node.
        """
        n,d = arr(X).shape
        nFeatures = d if nFeatures is None else min(nFeatures,d)
        minScore = -1

        self.classes = list(np.unique(Y)) if len(self.classes) == 0 else self.classes
        Y = toIndex(Y)

        sz = min(2 * n, 2**(maxDepth + 1))   # pre-allocate storage for tree:
        L, R, F, T = np.zeros((sz,)), np.zeros((sz,)), np.zeros((sz,)), np.zeros((sz,))

        L, R, F, T, last = self.__dectree_train(X, Y, L, R, F, T, 0, 0, minParent, maxDepth, minScore, nFeatures)

        self.L = L[0:last]
        self.R = R[0:last]
        self.F = F[0:last]
        self.T = T[0:last]
Пример #29
0
def shuffle_data(X, Y):
	"""
	Shuffle data in X and Y.

	Parameters
	----------
	X : numpy array
		N x M array of data to shuffle.
	Y : numpy arra
		1 x N array of labels that correspond to data in X.

	Returns
	-------
	X or (X,Y) : numpy array or tuple of arrays
		Shuffled data (only returns X and Y if Y contains data).
	
	TODO: test more
	"""
	nx,dx = twod(X).shape
	Y = arr(Y).flatten()
	ny = len(Y)

	pi = np.random.permutation(nx)
	X = X[pi,:]

	if ny > 0:
		assert ny == nx, 'shuffle_data: X and Y must have the same length'
		Y = Y[pi]
		return X,Y

	return X
	def __init__(self, X=None, Y=None, stepsize=.01, tolerance=1e-4, max_steps=5000, init='zeros'):
		"""
		Constructor for LogisticMSEClassifier (logistic classifier with MSE loss function.).

		Parameters
		----------
		X : N x M numpy array 
			N = number of data points; M = number of features.
		Y : 1 x N numpy array 
			Class labels that relate to the data points in X.
		stepsize : scalar
			Step size for gradient descent (decreases as 1/iter).
		tolerance : scalar
			Tolerance for stopping criterion.
		max_steps : int
			Max number of steps to take before training stops.
		init : str
			Initialization method; one of the following strings:
			'keep' (to keep current value), 'zeros' (init to all-zeros), 'randn' (init at random),
			and 'linreg' (init w/ small linear regression).
		"""
		self.wts = []								# linear weights on features (1st is constant)
		self.classes = arr([-1, 1])					# list of class values used in input

		if type(X) is np.ndarray and type(Y) is np.ndarray:
			self.train(X, Y, stepsize, tolerance, max_steps, init.lower())
Пример #31
0
    def createBasePlotAt(self, centerX, r, title='Current Step', mf=None):
        fig = plt.figure()
        fig.set_size_inches(sys_utils.get_plot_size(),
                            sys_utils.get_plot_size())
        ax1 = fig.add_subplot(111)

        matplotlib.rcParams['xtick.direction'] = 'out'
        matplotlib.rcParams['ytick.direction'] = 'out'

        x = linspace(centerX[0] - r, centerX[0] + r, num=100)
        y = linspace(centerX[1] - r, centerX[1] + r, num=100)
        X, Y = meshgrid(x, y)

        Z = empty((len(y), len(x)))

        plt.title(title)

        for i in range(0, len(x)):
            for j in range(0, len(y)):
                Z[j, i] = self.objective(arr([x[i], y[j]]))
        CS = plt.contour(X, Y, Z, 6, colors='k')
        plt.clabel(CS, fontsize=9, inline=1)

        if mf is not None:
            for i in range(0, len(x)):
                for j in range(0, len(y)):
                    Z[j, i] = mf(arr([x[i], y[j]]))
            CS = plt.contour(X, Y, Z, 6, colors='y')
            plt.clabel(CS, fontsize=9, inline=1)

        for idx in range(0, self.getNumEqualityConstraints()):
            for i in range(0, len(x)):
                for j in range(0, len(y)):
                    Z[j, i] = self.equalityConstraints(arr([x[i], y[j]]))[idx]
            CS = plt.contour(X, Y, Z, 6, colors='r')
            plt.clabel(CS, fontsize=9, inline=1)

        for idx in range(0, self.getNumInequalityConstraints()):
            for i in range(0, len(x)):
                for j in range(0, len(y)):
                    Z[j, i] = self.inequalityConstraints(arr([x[i],
                                                              y[j]]))[idx]
            CS = plt.contour(X, Y, Z, 6, colors='b')
            plt.clabel(CS, fontsize=9, inline=1)
        return ax1
Пример #32
0
    def load_dataset(self):
        cfg = self.cfg
        file_name = os.path.join(self.cfg.project_path, cfg.dataset)
        # Load Matlab file dataset annotation
        mlab = sio.loadmat(file_name)
        self.raw_data = mlab
        mlab = mlab["dataset"]

        num_images = mlab.shape[1]
        #        print('Dataset has {} images'.format(num_images))
        data = []
        has_gt = True

        for i in range(num_images):
            sample = mlab[0, i]

            item = DataItem()
            item.image_id = i
            base = str(self.cfg["project_path"])
            im_path = os.path.join(base, sample[0][0])
            item.im_path = im_path
            item.im_size = sample[1][0]
            if len(sample) >= 3:
                joints = sample[2][0][0]
                #                print(sample)
                joint_id = joints[:, 0]
                # make sure joint ids are 0-indexed
                if joint_id.size != 0:
                    assert (joint_id < cfg.num_joints).any()
                joints[:, 0] = joint_id
                coords = [joint[1:] for joint in joints]
                coords = arr(coords)
                item.coords = coords
                item.joints = [joints]
                item.joint_id = [arr(joint_id)]
                # print(item.joints)
            else:
                has_gt = False
            # if cfg.crop:
            #    crop = sample[3][0] - 1
            #    item.crop = extend_crop(crop, cfg.crop_pad, item.im_size)
            data.append(item)

        self.has_gt = has_gt
        return data
Пример #33
0
def read_single_2d_data(data: pd.DataFrame):
    length = len(data.index)
    index = arr(data.index)

    bp_interested = get_bp_interested(data)
    #bp_interested=['snout', 'leftear', 'rightear', 'tailbase']

    coords = np.zeros((length, len(bp_interested), 2))
    scores = np.zeros((length, len(bp_interested)))

    for bp_idx, bp in enumerate(bp_interested):
        bp_coords = arr(data[bp])
        coords[index, bp_idx, :] = bp_coords[:, :2]
        scores[index, bp_idx] = bp_coords[:, 2]

    return {'length': length,
            'coords': coords,
            'scores': scores}
Пример #34
0
    def __call__(self, W, X):
        for i, x in zip(self.inputs, X):
            i.val = x

        for layer in self.layers:
            for node in layer:
                node(W)

        return arr([out(W) for out in self.outputs])
Пример #35
0
    def make_batch(self, data_item, scale, mirror):
        im_file = data_item.im_path
        logging.debug('image %s', im_file)
        logging.debug('mirror %r', mirror)
        image = imread(im_file, mode='RGB')

        if self.has_gt:
            joints = np.copy(data_item.joints)

        if self.cfg.crop:
            crop = data_item.crop
            image = image[crop[1]:crop[3] + 1, crop[0]:crop[2] + 1, :]
            if self.has_gt:
                joints[:, 1:3] -= crop[0:2].astype(joints.dtype)

        img = imresize(image, scale) if scale != 1 else image
        scaled_img_size = arr(img.shape[0:2])

        if mirror:
            img = np.fliplr(img)

        batch = {Batch.inputs: img}

        if self.has_gt:
            stride = self.cfg.stride

            if mirror:
                joints = [
                    self.mirror_joints(person_joints, self.symmetric_joints,
                                       image.shape[1])
                    for person_joints in joints
                ]

            sm_size = np.ceil(scaled_img_size / (stride * 2)).astype(int) * 2

            scaled_joints = [
                person_joints[:, 1:3] * scale for person_joints in joints
            ]

            joint_id = [
                person_joints[:, 0].astype(int) for person_joints in joints
            ]
            part_score_targets, part_score_weights, locref_targets, locref_mask = self.compute_target_part_scoremap(
                joint_id, scaled_joints, data_item, sm_size, scale)

            batch.update({
                Batch.part_score_targets: part_score_targets,
                Batch.part_score_weights: part_score_weights,
                Batch.locref_targets: locref_targets,
                Batch.locref_mask: locref_mask
            })

        batch = {key: data_to_input(data) for (key, data) in batch.items()}

        batch[Batch.data_item] = data_item

        return batch
Пример #36
0
    def __init__(self, *args, **kwargs):
        """
        Constructor for treeRegress (decision tree regression model)

        Parameters: see "train" function; calls "train" if arguments passed

        Properties (internal use only)
           L,R : indices of left & right child nodes in the tree
           F,T : feature index & threshold for decision (left/right) at this node
                 for leaf nodes, T[n] holds the prediction for leaf node n
        """
        self.L = arr([0])  # indices of left children
        self.R = arr([0])  # indices of right children
        self.F = arr([0])  # feature to split on (-1 = leaf = predict)
        self.T = arr([0])  # threshold to split on (prediction value if leaf)

        if len(args) or len(kwargs):  # if we were given optional arguments,
            self.train(*args, **kwargs)  #  just pass them through to "train"
Пример #37
0
def data_gauss(N0,
               N1=None,
               mu0=arr([0, 0]),
               mu1=arr([1, 1]),
               sig0=np.eye(2),
               sig1=np.eye(2)):
    """Sample data from a two-component Gaussian mixture model.  	

	Args:
	    N0 (int): Number of data to sample for class -1.
	    N1 :(int) Number of data to sample for class 1.
	    mu0 (arr): numpy array
	    mu1 (arr): numpy array
	    sig0 (arr): numpy array
	    sig1 (arr): numpy array

	Returns:
	    X (array): Array of sampled data
	    Y (array): Array of class values that correspond to the data points in X.

	TODO: test more
	"""
    # ALT:  return data_GMM_new(N0, ((1.,[0,0],[1.]))
    #       return data_GMM_new(N0+N1, ((.5,[0,0],[1.]),(.5,[1,1],[1.])))
    if not N1:
        N1 = N0

    d1, d2 = twod(mu0).shape[1], twod(mu1).shape[1]
    if d1 != d2 or np.any(twod(sig0).shape != arr([d1, d1])) or np.any(
            twod(sig1).shape != arr([d1, d1])):
        raise ValueError('data_gauss: dimensions should agree')

    X0 = np.dot(np.random.randn(N0, d1), sqrtm(sig0))
    X0 += np.ones((N0, 1)) * mu0
    Y0 = -np.ones(N0)

    X1 = np.dot(np.random.randn(N1, d1), sqrtm(sig1))
    X1 += np.ones((N1, 1)) * mu1
    Y1 = np.ones(N1)

    X = np.row_stack((X0, X1))
    Y = np.concatenate((Y0, Y1))

    return X, Y
Пример #38
0
    def compute_target_part_scoremap_numpy(
        self, joint_id, coords, data_item, size, scale
    ):
        dist_thresh = float(self.cfg.pos_dist_thresh * scale)
        dist_thresh_sq = dist_thresh ** 2
        num_joints = self.cfg.num_joints

        scmap = np.zeros(cat([size, arr([num_joints])]))
        locref_size = cat([size, arr([num_joints * 2])])
        locref_mask = np.zeros(locref_size)
        locref_map = np.zeros(locref_size)

        width = size[1]
        height = size[0]
        grid = np.mgrid[:height, :width].transpose((1, 2, 0))

        for person_id in range(len(coords)):
            for k, j_id in enumerate(joint_id[person_id]):
                joint_pt = coords[person_id][k, :]
                j_x = np.asscalar(joint_pt[0])
                j_x_sm = round((j_x - self.half_stride) / self.stride)
                j_y = np.asscalar(joint_pt[1])
                j_y_sm = round((j_y - self.half_stride) / self.stride)
                min_x = round(max(j_x_sm - dist_thresh - 1, 0))
                max_x = round(min(j_x_sm + dist_thresh + 1, width - 1))
                min_y = round(max(j_y_sm - dist_thresh - 1, 0))
                max_y = round(min(j_y_sm + dist_thresh + 1, height - 1))
                x = grid.copy()[:, :, 1]
                y = grid.copy()[:, :, 0]
                dx = j_x - x * self.stride - self.half_stride
                dy = j_y - y * self.stride - self.half_stride
                dist = dx ** 2 + dy ** 2
                mask1 = dist <= dist_thresh_sq
                mask2 = (x >= min_x) & (x <= max_x)
                mask3 = (y >= min_y) & (y <= max_y)
                mask = mask1 & mask2 & mask3
                scmap[mask, j_id] = 1
                locref_mask[mask, j_id * 2 + 0] = 1
                locref_mask[mask, j_id * 2 + 1] = 1
                locref_map[mask, j_id * 2 + 0] = (dx * self.locref_scale)[mask]
                locref_map[mask, j_id * 2 + 1] = (dy * self.locref_scale)[mask]

        weights = self.compute_scmap_weights(scmap.shape, joint_id, data_item)
        return scmap, weights, locref_map, locref_mask
Пример #39
0
def drawing_housing_units(db,
                          frequencies,
                          weights,
                          index_matrix,
                          sp_matrix,
                          pumano=0):

    dbc = db.cursor()
    dbc.execute('select hhlduniqueid from hhld_sample group by hhlduniqueid')
    hhld_colno = dbc.rowcount
    dbc.execute('select gquniqueid from gq_sample group by gquniqueid')
    gq_colno = dbc.rowcount

    hh_colno = hhld_colno + gq_colno
    synthetic_population = []
    j = 0

    for i in index_matrix[:hh_colno, :]:
        if i[1] == i[2] and frequencies[j] > 0:
            synthetic_population.append(
                [sp_matrix[i[1] - 1, 2], frequencies[j], i[0]])
        else:
            cumulative_weights = weights[sp_matrix[i[1] - 1:i[2], 2]].cumsum()
            probability_distribution = cumulative_weights / cumulative_weights[
                -1]
            probability_lower_limit = probability_distribution.tolist()
            probability_lower_limit.insert(0, 0)
            probability_lower_limit = arr(probability_lower_limit)
            random_numbers = random.rand(frequencies[j])
            freq, probability_lower_limit = histogram(random_numbers,
                                                      probability_lower_limit)
            hhldid_by_type = sp_matrix[i[1] - 1:i[2], 2]

            for k in range(len(freq)):
                if freq[k] <> 0:
                    #hhid = hhidRowDict[hhldid_by_type[k]]
                    # storing the matrix row no, freq, type
                    synthetic_population.append(
                        [hhldid_by_type[k], freq[k], i[0]])
        j = j + 1

    dbc.close()
    db.commit()
    return arr(synthetic_population, int)
Пример #40
0
	def train(self, X, Y, init='zeros', stepsize=.01, tolerance=1e-4, max_steps=5000):
		"""
		This method trains the neural network. Refer to constructor
		doc string for descriptions of arguments.
		"""
		if self.wts[0].shape[1] - 1 != len(X[0]):
			raise ValueError('NNetClassify.__init__: sizes[0] must == len(X) (number of features)')

		if len(np.unique(Y)) != self.wts[-1].shape[0]:
			raise ValueError('NNetClassify.__init__: sizes[-1] must == the number of classes in Y')

		self.classes = self.classes if self.classes else np.unique(Y)

		# convert Y to 1-of-K format
		Y_tr_k = to_1_of_k(Y)

		n,d = mat(X).shape													# d = dim of data, n = number of data points
		nc = len(self.classes)												# number of classes
		L = len(self.wts) 													# get number of layers

		# define desired activation function and it's derivative (for training)
		sig,d_sig, = self.sig, self.d_sig
		sig_0,d_sig_0 = self.sig_0, self.d_sig_0

		# outer loop of stochastic gradient descent
		iter = 1															# iteration number
		done = 0															# end of loop flag

		surr = np.zeros((1, max_steps + 1)).ravel()							# surrogate loss values
		err = np.zeros((1, max_steps + 1)).ravel()							# misclassification rate values

		while not done:
			step_i = stepsize / iter										# step size evolution; classic 1/t decrease
			
			# stochastic gradient update (one pass)
			for i in range(n):
				A,Z = self.__responses(self.wts, X[i,:], sig, sig_0)		# compute all layers' responses, then backdrop
				delta = (Z[L] - Y_tr_k[i,:]) * arr(d_sig_0(Z[L]))			# take derivative of output layer

				for l in range(L - 1, -1, -1):
					grad = mat(delta).T * mat(Z[l])							# compute gradient on current layer wts
					delta = np.multiply(delta.dot(self.wts[l]), d_sig(Z[l]))# propagate gradient downards
					delta = delta[:,1:]										# discard constant feature
					self.wts[l] = self.wts[l] - step_i * grad				# take gradient step on current layer wts

			err[iter] = self.err_k(X, Y_tr_k)								# error rate (classification)
			surr[iter] = self.mse_k(X, Y_tr_k)								# surrogate (mse on output)

			print('surr[iter]')
			print(surr[iter])
			print('iter')
			print(iter)

			# check if finished
			done = (iter > 1) and (np.abs(surr[iter] - surr[iter - 1]) < tolerance) or iter >= max_steps
			iter += 1
Пример #41
0
def updateAcceleration(n, x, y, xmin, ymin, nxCell, dxCell, dyCell, linkHead,
                       linkNext, h, h_sqr, spiky_gradientFac,
                       viscosity_laplacianFac, mass, P, rho, eta, gravity, vx,
                       vy, ax, ay):

    for iP in prange(n):
        ax[iP] = 0.0
        ay[iP] = 0.0
        xi = x[iP]
        yi = y[iP]
        axi = ax[iP]
        ayi = ay[iP]
        vxi = vx[iP]
        vyi = vy[iP]
        Pi = P[iP]

        # Get the index of the node closest to particle[iP]
        Ix = np.int(np.round((xi - xmin) / dxCell))
        Iy = np.int(np.round((yi - ymin) / dyCell))

        # Loop through neighboring cells
        for Ineigh in [
                Ix + Iy * nxCell, Ix + 1 + Iy * nxCell, Ix + (Iy + 1) * nxCell,
                Ix + 1 + (Iy + 1) * nxCell
        ]:
            jP = linkHead[Ineigh]
            while (jP >= 0):  # Negative value = Null
                if jP == iP:
                    jP = linkNext[jP]
                    continue
                r_sqr = (xi - x[jP])**2 + (yi - y[jP])**2
                if r_sqr < h_sqr:
                    r = np.sqrt(r_sqr)
                    R = arr([(xi - x[jP]) / r, (yi - y[jP]) / r])

                    # Compute pressure force
                    gradW = spiky_gradientFac * (h - r)**2 * R
                    Fac = -mass[jP] * (Pi + P[jP]) / (2.0 * rho[jP])
                    axi += Fac * gradW[0]
                    ayi += Fac * gradW[1]

                    # Compute viscous force
                    laplacianW = viscosity_laplacianFac * (h - r)
                    Fac = eta * mass[jP] / rho[jP] * laplacianW
                    axi += Fac * (vx[jP] - vxi)
                    ayi += Fac * (vy[jP] - vyi)

                # end if dist
                jP = linkNext[jP]
            # end jP
        # end Ineigh
        axi += rho[iP] * gravity[0]
        ayi += rho[iP] * gravity[1]

        ax[iP] = axi / rho[iP]
        ay[iP] = ayi / rho[iP]
Пример #42
0
def multi_forward_pass_epoch():
    '''use validation data predict output by averaging prediction with active dropout
    '''
    np.set_printoptions(suppress=True)
    classifications = np.zeros((4200,10))
    for i in range(50):
        classifications = np.add(classifications,(np.float64(arr(nn.feedforward_valid_drop_function()))))    
        
    #print 'Mean ' + str(np.mean(classifications,axis=0))
    return nn.cross_validation_function_dropout(np.float32(classifications/15.))[0]
Пример #43
0
def multi_forward_pass(batch):
    '''Use training data to predict output by averaging prediction with active dropout.    
    '''
    np.set_printoptions(suppress=True)
    classifications = np.zeros((150,10))
    for i in range(15):
        classifications = np.add(classifications,(np.float64(arr(nn.feedforward_function(batch)))))    
        
    #print 'Mean ' + str(np.mean(classifications,axis=0))
    return nn.train_error_function_dropout(batch,np.float32(classifications/15.))[0]
Пример #44
0
def fromIndex(Y, values):
    """
    Convert index-valued Y into discrete representation specified by values
    in values.

    Parameters
    ----------
    Y : numpy array
        1 x N (or N x 1) numpy array of indices.
    values : numpy array
        1 x max(Y) array of values for conversion.

    Returns
    -------
    discrete_Y : numpy array
        1 x N (or N x 1) numpy array of discrete values.
    """
    discrete_Y = arr(values)[arr(Y)]
    return discrete_Y
Пример #45
0
def one_basis_function_plot():
    fig = plt.figure()
    ax = fig.add_subplot(111)
    # ax = fig.add_axes([-.1,1.1, -.1,1.1])

    # Basis function phi_3
    x, y = arr([0, .4, .6, .8, 1.]), arr([0., 0., 1., 0., 0.])
    ax.plot(x, y, '-m', linewidth=2.0)
    ax.text(0.6, 1.02, r"$\phi_3$", fontsize=18)

    ax.set_ylim(-.1, 1.1)
    ax.set_xlim(-.1, 1.1)
    # plt.xticks(np.linspace(0.,1.,6),['$x_0$','$x_1$','$x_2$','$x_3$','$x_4$','$x_5$'],fontsize=18)
    plt.xticks(arr([.4, .6, .8]), ['$x_2$', '$x_3$', '$x_4$'], fontsize=18)
    # print Axis.get_majorticklabels(plt.axis)
    plt.savefig("one_basis_function.pdf")

    # plt.show()
    plt.clf()
Пример #46
0
def updateCenters(k, xt, C):
    #Update the value of centers, return the new k and dictionary without points
    means = []
    for kx in range(k):
        if len(C[kx]['points']) == 0:
            pass
        else:
            means.append(np.mean(C[kx]['points'], axis=0))
    C = createDict(k, arr(means))
    return C
Пример #47
0
def exp2steps(results, colors):
    plt.clf()
    x = [17, 35, 43, 48, 55, 70, 124, 170, 323, 403]
    for alg, result in results.items():
        c = next(colors)
        if alg in 'GS':
            plt.plot(x, result.avg_steps, label=alg, c=c)
            plt.fill_between(x,
                             arr(result.avg_steps) - arr(result.std_steps),
                             arr(result.avg_steps) + arr(result.std_steps),
                             alpha=0.5,
                             color=c)
            plt.scatter(x, result.avg_steps, c=c)
    plt.xlabel("Wielkość instancji")
    plt.ylabel("Liczba kroków")
    plt.xlim(1, 410)
    plt.legend()
    plt.grid(True)
    plt.savefig("plots/exp2steps.pdf", format="pdf", bbox_inches='tight')
Пример #48
0
def plot_lum():
    clf()
    j_3min = [
        8052.06, 3050.04, 324.251, 20082.0, 1443.05, 1070.26, 1879.54, 3210.33,
        312.932, 233.877, 714.423, 112.846, 126.616
    ]
    j_3min2 = [
        8052.06, 3050.04, 324.251, 1443.05, 1070.26, 1879.54, 3210.33, 312.932,
        233.877, 714.423, 112.846, 126.616
    ]
    j_3min3 = [
        3050.04, 324.251, 1443.05, 1070.26, 1879.54, 3210.33, 312.932, 233.877,
        714.423, 112.846, 126.616
    ]

    j_3min = [
        8052.06, 3050.04, 324.251, 20082.0, 1443.05, 1070.26, 1879.54, 3210.33,
        312.932, 233.877, 714.423, 188.211, 1594, 57.29, 833466.82317
    ]

    #convert to cgs from microjansky:
    j_3min = arr(j_3min) * 10**(-29)

    #convert to AB magnitude:
    j_3min = -2.5 * numpy.log10(j_3min) - 48.60

    hist(j_3min, 13)
    xlabel('$m_j$', fontsize=28)
    ylabel('Number', fontsize=28)
    yticks(arr([0, 1., 2., 3., 4.]))
    ax = matplotlib.pyplot.gca()
    ax.set_xlim(ax.get_xlim()[::-1])  # reversing the xlimits
    savefig('Lum_dist.eps')

    clf()
    #   hist(j_3min,20,cumulative=True, histtype='step')
    #  hist(j_3min2,20,cumulative=True, histtype='step')
    # hist(j_3min3,20,cumulative=True, histtype='step')
    #ylim(0,14)
    #xlim(-1000,22000)
    #xlabel('J Flux at 3 Minutes (Micro Jansky)')
    #  savefig('lum_dist.eps')
    return j_3min
Пример #49
0
def update(y, n_states):
    from numpy.random import randn
    from numpy import arange
    from numpy import eye
    from numpy import array as arr
    from numpy import cov
    from numpy.random import multivariate_normal as mvn_rand
    from sklearn.cluster import KMeans

    data_dim, data_len = y.shape

    # --- setting
    gmm_setting = {
        # 'update_order': ['M', 'E'],
        'update_order': ['E', 'M'],
        # 'expt_init_mode': 'random',
        'expt_init_mode': 'kmeans',
    }
    gmm = Gmm(data_dim, n_states, **gmm_setting)

    # --- Mu
    mu_mode = 'mvn_rand'
    if mu_mode == 'zeros':
        mu = zeros((data_dim, n_states))
    elif mu_mode == 'randn':
        c = 2
        mu = randn(data_dim, n_states) * c
    elif mu_mode == 'arange':
        mu = arange(data_dim * n_states).reshape(data_dim, n_states)
    elif mu_mode == 'kmeans':
        km = KMeans(n_states)
        km.fit(y.T)
        mu = km.cluster_centers_.T
    elif mu_mode == 'mvn_rand':
        mu = mvn_rand(y.mean(1), cov(y), size=n_states).T
    else:
        raise Exception('Not supported: %s' % mu_mode)

    alpha = ones(n_states) * (data_len / n_states)
    W = arr([eye(data_dim) * 1e+3 for k in range(n_states)])
    W = W.transpose(1, 2, 0)
    # --- set params
    prms = {'MuR': {'mu': mu, 'W': W}, 'Pi': {'alpha': alpha}}
    gmm.set_params(prms)
    gmm.init_expt_s(data_len, y)
    # --- plotter
    s = gmm.expt_s.argmax(0)
    plotter(y, s, gmm, 'GMM prior', 2)
    # --- update
    gmm.update(y, 200)
    # --- plotter
    s = gmm.expt_s.argmax(0)
    plotter(y, s, gmm, 'posterior', 3)

    predict_y, predict_s, vb = gmm.predict(y)
    def plot_settings(self):
        """ Plotting settings (colors, linewidths etc.), possibly depending on bus variable.
        """

        var = 'Vbase'  # base colors etc on Vbase
        var_lim = [380, 300,
                   0]  # different categories of Vbase, should be a list

        # bus settings
        self.sets_variable_lim = var_lim
        var = self.bus.loc[:, var]
        self.bus_set = arr([
            find(v >= arr(var_lim))[0] if v >= var_lim[-1] else -1 for v in var
        ])
        self.bus_color = ['r', (230. / 255, 152. / 255, 0), 'g']
        self.bus_name_color = ['k'] * 3
        self.bus_lw = [1.5, 1, 1]
        self.bus_name_fs = [0, 0, 0]

        # line settings
        var_line = var.loc[self.line.bus0]
        self.line_set = arr([
            find(v >= arr(var_lim))[0] if v >= var_lim[-1] else -1
            for v in var_line
        ])
        self.line_lw = [1, 1, 1]
        self.line_color = ['r', (230. / 255, 152. / 255, 0), 'g']

        # Link
        self.link_lw = 1
        self.link_color = 'b'

        # Interactive plot
        self.interactive = True  # interactive map mode
        self.picker_node = 7  # tolerance for interactive picking
        self.picker_arc = 3
        self.significant_figures = 3  # when info is displayed
        self.info_fc = [213. / 255, 230. / 255, 1]  # color for info box
        self.info_ec = 'k'  # color info-box edge
        self.info_lw = 1  # info-box edge width

        self.equal_aspect = False
Пример #51
0
def get_tracklets_info(tracklets, all_tracklet_prop):
    tracklets_info = get_tracklets_temporal_info(tracklets)

    for i, ts in enumerate(tracklets):
        t_start = tracklets_info[i]['start']
        t_end = tracklets_info[i]['end']

        t_reid = arr([
            all_tracklet_prop[t][ind]['reid']
            for t in range(t_start, t_end + 1)
            for ind in range(len(all_tracklet_prop[t]))
            if all_tracklet_prop[t][ind]['id'] == ts[t]
        ])

        t_avg_reid = np.mean(t_reid, axis=0)
        tracklets_info[i]['avg_reid_score'] = t_avg_reid

        tracklets_info[i]['start_prop_reid'] = t_reid[0]
        tracklets_info[i]['end_prop_reid'] = t_reid[-1]

        seq = range(t_start, t_end + 1)
        tracklets_info[i]['avg_area'] = np.mean([
            all_tracklet_prop[t][ind]['area'] for t in seq
            for ind in range(len(all_tracklet_prop[t]))
            if all_tracklet_prop[t][ind]['id'] == ts[t]
        ])

        t_score = [
            all_tracklet_prop[t][ind]['score']
            for t in range(t_start, t_end + 1)
            for ind in range(len(all_tracklet_prop[t]))
            if all_tracklet_prop[t][ind]['id'] == ts[t]
        ]
        t_avg_score = np.mean(
            arr(t_score), axis=0) if len(t_score) != 0 else arr([
                all_tracklet_prop[t_start][ind]['score']
                for ind in range(len(all_tracklet_prop[t_start]))
                if all_tracklet_prop[t_start][ind]['id'] == ts[t_start]
            ])
        tracklets_info[i]['avg_score'] = t_avg_score

    return tracklets_info
Пример #52
0
 def get_old_key(self):
     """
     :param file:
     :return:
     """
     keyNames = []
     keyValues = []
     foundOne = False
     for var in self.f['Master-Parameters']['Variables']:
         if not self.f['Master-Parameters']['Variables'][var].attrs['Constant']:
             foundOne = True
             keyNames.append(var)
             keyValues.append(arr(self.f['Master-Parameters']['Variables'][var]))
     if foundOne:
         if len(keyNames) > 1:
             return keyNames, arr(np.transpose(arr(keyValues)))
         else:
             return keyNames[0], arr(keyValues[0])
     else:
         return 'No-Variation', arr([1])
Пример #53
0
 def __init__(self, path: str, **params) -> None:
     self.path = path
     self.size = 0
     self.compname, self.comptype = None, None
     self.content = arr([])
     if access(path, F_OK):
         self.load()
     if params:
         self.setparams(**params)
         return
     raise FileNotFoundError('for new files you need to provide params!')
Пример #54
0
 def force(self, vertex1, vertex2):
     """ Calculates the inverse-r^2 force (given by the E field) between
         two charged points (vertices). Note the obvious difference with real
         physics: q1 + q2, not q1*q2. It just makes things that look nicer.
     """
     graph = self._graph_dict
     dx = graph[vertex2]["loc"][0] - graph[vertex1]["loc"][0]
     dy = graph[vertex2]["loc"][1] - graph[vertex1]["loc"][1]
     q1 = graph[vertex1]["weight"]
     q2 = graph[vertex2]["weight"]
     return (q1 + q2) * arr([dx, dy]) / (dx**2 + dy**2)**1.5
Пример #55
0
    def __init__(self, *args, **kwargs):
        """Constructor for decision tree base class

        Args:
          *args, **kwargs (optional): passed to train function

        Properties (internal use only)
           L,R (arr): indices of left & right child nodes in the tree
           F,T (arr): feature index & threshold for decision (left/right) at this node
             P (arr): for leaf nodes, P[n] holds the prediction for leaf node n
        """
        self.L = arr([])  # indices of left children
        self.R = arr([])  # indices of right children
        self.F = arr([])  # feature to split on (-1 = leaf = predict)
        self.T = arr([])  # threshold to split on
        self.P = arr([])  # prediction value for node
        self.sz = 0  # size; also next node during construction

        if len(args) or len(kwargs):  # if we were given optional arguments,
            self.train(*args, **kwargs)  #  just pass them through to "train"
	def predict(self, X):
		"""
		This method makes a prediction on X using learned linear coefficients.

		Parameters
		----------
		X : numpy array 
			N x M numpy array that contains N data points with M features.
		"""
		X_te = np.concatenate((np.ones((mat(X).shape[0],1)), X), axis=1)		# extend features by including a constant feature
		return arr(mat(X_te) * mat(self.theta).T)
Пример #57
0
def logmatprod(ln_a, ln_b):
    '''
    ln_[i, j] = log(sum(exp(ln a[i, ...] + ln b[:, j])))
    parameters
    ln_a: np.array(size_A, ...)
    ln_b: np.array(size_A, size_B)

    returns
    ln_C: np.array(size_A, size_B)
    '''
    from numpy import zeros
    ln_a = arr([ln_a]) if ln_a.ndim == 1 else ln_a
    ln_b = arr([ln_b]) if ln_b.ndim == 1 else ln_b
    I = ln_a.shape[0]
    J = ln_b.shape[1]
    ln_C = zeros((I, J))
    for i in range(I):
        for j in range(J):
            ln_C[i, j] = logsumexp(ln_a[i] + ln_b[:, j], -1)
    return ln_C
    def train(self,
              X,
              Y,
              init='zeros',
              stepsize=.01,
              tolerance=1e-4,
              max_steps=500):
        """
		This method trains the neural network. Refer to constructor
		doc string for descriptions of arguments.
		"""
        n, d = mat(
            X).shape  # d = number of features; n = number of training data
        L = len(self.wts) + 1  # number of layers

        # define desired activation function and its derivative for training
        sig, d_sig, sig_0, d_sig_0 = self.sig, self.d_sig, self.sig_0, self.d_sig_0

        # outer loop of gradient descent
        iter = 1  # iteration number
        done = 0  # end of loop flag
        surr = np.zeros((1, max_steps + 1)).ravel()  # surrogate loss values
        errs = np.zeros((1, max_steps + 1)).ravel()  # error rate values

        while not done:
            step_i = stepsize / iter

            # stochastic gradient update
            for i in range(n):
                A, Z = self.__responses(
                    self.wts, X[i, :], sig,
                    sig_0)  # compute all layers' responses, then backdrop
                delta = (Z[L - 1] - Y[i]) * d_sig_0(
                    Z[L - 1])  # take derivative of output layer

                for l in range(L - 2, 0, -1):
                    grad = arr(
                        mat(delta).T *
                        mat(Z[l]))  # compute gradient on current layer weights
                    delta = np.dot(delta, self.wts[l]) * d_sig(
                        Z[l])  # propagate gradient downwards
                    delta = delta[1:]  # discard constant feature
                    self.wts[l] = self.wts[
                        l] - step_i * grad  # take gradient step on current layer weights

            # compute current error values
            errs[iter] = self.mse(X, Y)  # surrogate (mse on output)

            # check stopping conditions
            done = iter > 1 and (abs(errs[iter] - errs[iter - 1]) < tolerance
                                 or iter >= max_steps)

            iter += 1
            wts_old = self.wts
Пример #59
0
    def predict(self, X):
        """
        This method makes predictions on the test data X.

        Parameters
        ----------
        X : M x N numpy array of M data points (N features each) at which to predict
        """
        Y_te = self.__dectree_test(X, self.L, self.R, self.F, self.T,
                                   0).T.ravel()
        return arr([[self.classes[int(i)]] for i in np.ravel(Y_te)])
Пример #60
0
def marginals(db, synthesis_type, variable_name, pumano, tract, bg):
# Returns the marginals wrt the entered dimension for calculating the adjustment in each iteration
    dbc = db.cursor()
    dbc.execute('select %s, sum(frequency) from %s_%s_joint_dist where tract = %s and bg = %s group by %s' %( variable_name, synthesis_type, pumano, tract, bg, variable_name))
    result = arr(dbc.fetchall(), float)
    marginal = []
    for i in result:
        marginal.append(float(i[1]))
    dbc.close()
    db.commit()
    return marginal