Exemplos de DatabaseReader em Python, exemplos de database_reader.DatabaseReader em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: regression_util.py Projeto: afcarl/Global-Indicators

	def build(self, dateRange):

		db = DatabaseReader()
		dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
				self.country, (dateRange[0], dateRange[1]), asNumpyMatrix = False)

		#Read attribute values for each year in object attValue list
		for year in range(0, len(dataMatrix)):
			self.availYears.append(dateRange[0] + year)
			self.attValues.append(dataMatrix[year][attributeDict[self.attribute]])

Exemplo n.º 2

0

Exibir arquivo

def testPCRegression():
    db = DatabaseReader()
    testData, testDic = db.fetchCountryData("United States")[:2]
    testData, testDic = clean.removeSparseAttributes(testData, testDic)
    clean.transformColumns(testData, clean.smoothByAverage)
    clean.transformColumns(testData, clean.normalizeByZScore)
    testData, testDic, vect = clean.splitOffAttr(testData,
            testDic, "GDP (constant LCU)")
    coeffs, testData, testDic = pcRegression(testData[:45,:], testDic, vect[:45,:], constant = 1)
    prediction = testData[:45, :] * np.asmatrix(coeffs).T
    plot.plotCountry(range(45), "Time (Years)", [(vect[:45,:], "GDP"),
            (prediction, "GDP Predicted")], "GDP vs Time", decorations=['k', 'r'])

Exemplo n.º 3

0

Exibir arquivo

Arquivo: generate_regression_data.py Projeto: Brennan-M/Global-Indicators

	def cleanAttributes(self, attributes):
		db = DatabaseReader()
		dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
		self.country, (1960, 2014), useCountryCode=False, asNumpyMatrix=False)

		ret_atts = []
		for key,value in attributeDict.items():
			for att in attributes:
				if(key == att):
					ret_atts.append(att)

		return ret_atts

Exemplo n.º 4

0

Exibir arquivo

Arquivo: generate_regression_data.py Projeto: afcarl/Global-Indicators

    def cleanAttributes(self, attributes):
        db = DatabaseReader()
        dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
            self.country, (1960, 2014),
            useCountryCode=False,
            asNumpyMatrix=False)

        ret_atts = []
        for key, value in attributeDict.items():
            for att in attributes:
                if (key == att):
                    ret_atts.append(att)

        return ret_atts

Exemplo n.º 5

0

Exibir arquivo

class MinMax(object):

    def __init__(self, path=DB_PATH):
	    self.db = DatabaseReader(path)

    def generateData(self, attribute, normalization, smoothing):
	    print attribute
	    data, rowDic, colDic = self.db.fetchAttributeOverTimeData(attribute)
            data, rowDic = Clean.removeInvalidCountries(data, rowDic)
	    data, colDic = Clean.findValidTimeRange(data, colDic)

	    # Take the transpose
	    data = data.T
	    rowDic, colDic = colDic, rowDic


            # Do some smoothing
            if smoothing == "replacement":
                data = Clean.transformColumns(data, Clean.smoothByReplacement(0))
            elif smoothing == "average":
                data = Clean.transformColumns(data, Clean.smoothByAverage)
            elif smoothing == "interpolation":
                data = Clean.transformColumns(data, Clean.smoothByInterpolation)

            # Do some normalization
            if normalization == "global-min-max":
                data = Clean.normalizeByGlobalMinMax(data.T).T
            elif normalization == "min-max":
                data = Clean.transformColumns(data.T, Clean.normalizeByMinMax).T
            elif normalization == "z-score":
                data = Clean.transformColumns(data.T, Clean.normalizeByZScore).T

            Clean.normalizeByMinMax(data.T[:, 0])

	    return Clean.transformToDictionary(data, rowDic, colDic)

Exemplo n.º 6

0

Exibir arquivo

    def getData(self):
        db = DatabaseReader()
        self.values, self.countries, self.col = db.fetchAttributesData(
            self.attributes, self.year)

        matrix_cleaning.transformColumns(self.values,
                                         matrix_cleaning.smoothByAverage)

        for i in self.countries.keys():
            ccode = self.countries[i]
            attrDict = {}
            for j in self.col.keys():
                attr = self.col[j]
                attrDict[attr] = self.values[i, j]

            self.countryVals.update({ccode: attrDict})

Exemplo n.º 7

0

Exibir arquivo

Arquivo: generate_regression_data.py Projeto: afcarl/Global-Indicators

    def actual(self):
        db = DatabaseReader()
        dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
            self.country, (1960, 2014),
            useCountryCode=False,
            asNumpyMatrix=False)

        try:
            clean.transformColumns(dataMatrix, clean.smoothByAverage)
        except ValueError:
            clean.transformColumns(dataMatrix, clean.smoothByReplacement(0))

        # for year in dataMatrix:
        # 	print year

        valid = False
        for key, value in attributeDict.items():
            if (key == self.attribute):
                valid = True

        if (valid == False):
            return 0

        yt_ = []  #un-numpified training data for y
        for year in range(0, len(dataMatrix)):
            yt_.append(dataMatrix[year][attributeDict[self.attribute]])

        yt_ = np.asarray(yt_)  #numpify target training data

        x_ = []
        for year in range(1960, 2015):
            x_.append(year)

        actualdict = {}
        for num in range(0, len(x_)):
            actualdict[x_[num]] = yt_[num]

        for key, value in actualdict.items():
            if (math.isnan(value)):
                del (actualdict[key])

        return actualdict

Exemplo n.º 8

0

Exibir arquivo

Arquivo: generate_regression_data.py Projeto: Brennan-M/Global-Indicators

	def actual(self):
		db = DatabaseReader()
		dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
		self.country, (1960, 2014), useCountryCode=False, asNumpyMatrix=False)

		try:
			clean.transformColumns(dataMatrix, clean.smoothByAverage)
		except ValueError:
			clean.transformColumns(dataMatrix, clean.smoothByReplacement(0))

		# for year in dataMatrix:
		# 	print year

		valid = False
		for key,value in attributeDict.items():
			if(key == self.attribute):
				valid = True

		if (valid == False):
			return 0


		yt_ = [] #un-numpified training data for y
		for year in range(0, len(dataMatrix)):
			yt_.append(dataMatrix[year][attributeDict[self.attribute]])

		yt_ = np.asarray(yt_) #numpify target training data

		x_ = []
		for year in range(1960,2015):
			x_.append(year)

		actualdict = {}
		for num in range(0, len(x_)):
			actualdict[x_[num]] = yt_[num]

		for key, value in actualdict.items():
			if(math.isnan(value)):
				del(actualdict[key])

		return actualdict

Exemplo n.º 9

0

Exibir arquivo

def testPCA():
    db = DatabaseReader()
    testData, testDic = db.fetchCountryData("United States")[:2]
    testData = testData[:-1, :]
    testData, testDic = clean.removeSparseAttributes(testData, testDic)
    clean.transformColumns(testData, clean.smoothByAverage)
    clean.transformColumns(testData, clean.normalizeByZScore)
    testData, testDic, vect = clean.splitOffAttr(testData,
            testDic, "GDP (constant LCU)")

    pcaData, pcaObj = doPCA(testData[:45])
    print "_____________ explained variance ratio_______________"
    print pcaObj.explained_variance_ratio_

    pcaData, testDic = clean.addColumn(pcaData, testDic,
            clean.CONSTANT_DIC_VALUE, 1)
    coeffs, residual = LA.lstsq(pcaData, vect[:45])[:2]
    print "_______________ residual _______________"
    print residual
    coeffs = np.asmatrix(coeffs)

    # Find top 10
    print '_______________Top 10 of 1st Component________________'
    l = []
    e1 = np.matrix([[1, 0, 0, 0, 0]])
    for i in np.nditer(pcaObj.inverse_transform(e1)):
        l.append(i)
    getTopContributors(l, testDic)

    prediction = pcaData * coeffs
    plot.plotCountry(range(1960, 2005), "Year", [(vect[:45,:], "True GDP"),
            (prediction, "Predicted GDP")], "Normalized GDP vs Year", decorations=['k', 'r--'])

    predictData = pcaObj.transform(testData[45:])
    predictData, testDic = clean.addColumn(predictData, testDic,
            clean.CONSTANT_DIC_VALUE, 1)
    prediction = predictData * coeffs

    plot.plotCountry(range(2005, 2014), "Year", [(vect[45:,:], "True GDP"),
            (prediction, "Predicted GDP")], "Normalized GDP vs Year", decorations=['k', 'r--'])

Exemplo n.º 10

0

Exibir arquivo

    def __init__(self):
        self.book_data = DatabaseReader()
        self.arm_controller = ArmController()
        self.location_driver = NavigationManager()
        self.torso = fetch_api.Torso()
        self.head = fetch_api.Head()
        self.cmdline = False
        self.cmdline_grab_tray = False
        self.cmdline_grab_book = False

        # home for sim.  could be refactored better
        # returnPose = Pose()
        # returnPose.position.x = 0.3548
        # returnPose.position.y = 0.6489
        # returnPose.position.z = 0.0
        # returnPose.orientation.x = 0.0
        # returnPose.orientation.y = 0.0
        # returnPose.orientation.z = 0.14559
        # returnPose.orientation.w = .989

        # home for real robot as negative book indices
        # self.home_pose = returnPose
        self.home_pose = self.book_data.library[-1].pose
        self.delivery_pose = self.book_data.library[-2].pose

Exemplo n.º 11

0

Exibir arquivo

    def __init__(self, path=DB_PATH):
	    self.db = DatabaseReader(path)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: generate_regression_data.py Projeto: afcarl/Global-Indicators

    def polynomial(self, degree, attributes):
        """Read in limited database for training"""
        db = DatabaseReader()
        dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
            self.country, (1960, 2000),
            useCountryCode=False,
            asNumpyMatrix=False)

        try:
            clean.transformColumns(dataMatrix, clean.smoothByAverage)
        except ValueError:
            clean.transformColumns(dataMatrix, clean.smoothByReplacement(0))

        valid = False
        for key, value in attributeDict.items():
            if (key == self.attribute):
                valid = True

        if (valid == False):
            return 0
        """Initialize, fill, and convert target data for training"""
        yt_ = []  #un-numpified training data for y
        for year in range(0, len(dataMatrix)):
            yt_.append(dataMatrix[year][attributeDict[self.attribute]])

        yt_ = np.asarray(yt_)  #numpify target training data
        """This section creates lists of regression line equations for use in predictions"""
        polylines = []
        eqList = []

        #Iterate through attributes and calculate regression lines for each against the target attribute
        for att in attributes:
            xt_ = []  #un-numpified training data for x
            for year in range(0, len(dataMatrix)):
                temp = dataMatrix[year][attributeDict[att]]
                xt_.append(temp)

            xt_ = np.asarray(xt_)  #numpify training data for x

            pfit = np.polyfit(xt_, yt_, degree)
            poly = np.poly1d(pfit)
            eqList.append(poly)

            pfit = tuple(pfit)
            polylines.append(pfit)
        """Read in data up to modelYear, for modeling and comparison"""
        db2 = DatabaseReader()
        dataMatrix2, colDictionary2, attributeDict2 = db.fetchCountryData(
            self.country, (1960, 2014),
            useCountryCode=False,
            asNumpyMatrix=False)
        """Initialize new copy of target attribute data and populate for comparison"""
        yp_ = yt_
        x_ = []

        for year in range(1960, 2015):
            x_.append(year)

        x_ = np.asarray(x_)

        tempval = 0
        tempsum = 0
        totsum = 0

        #print sum(weights)

        for year in range(41, 55):
            totsum = 0
            for num in range(0, len(attributes)):
                xtemp = dataMatrix2[year][attributeDict2[attributes[num]]]
                regtemp = eqList[num]
                totsum = totsum + (regtemp(xtemp))
            yp_ = np.append(yp_, totsum / (len(attributes)))

        polydict = {}
        for num in range(0, len(x_)):
            polydict[x_[num]] = yp_[num]

        for key, value in polydict.items():
            if (math.isnan(value)):
                del (polydict[key])

        return polydict

Exemplo n.º 13

0

Exibir arquivo

Arquivo: generate_regression_data.py Projeto: Brennan-M/Global-Indicators

	def log(self, attributes):
		db = DatabaseReader()
		dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
		self.country, (1960, 2000), useCountryCode=False, asNumpyMatrix=False)

		try:
			clean.transformColumns(dataMatrix, clean.smoothByAverage)
		except ValueError:
			clean.transformColumns(dataMatrix, clean.smoothByReplacement(0))

		valid = False
		for key,value in attributeDict.items():
			if(key == self.attribute):
				valid = True

		if (valid == False):
			return 0

		db2 = DatabaseReader()
		dataMatrix2, colDictionary2, attributeDict2 = db.fetchCountryData(
		self.country, (2001, 2014), useCountryCode=False, asNumpyMatrix = False)

		try:
			clean.transformColumns(dataMatrix2, clean.smoothByAverage)
		except ValueError:
			clean.transformColumns(dataMatrix2, clean.smoothByReplacement(0))


		"""Initialize, fill, and convert target data for training"""
		yt_ = [] #un-numpified training data for y
		
		for year in range(0, len(dataMatrix)):
			yt_.append(dataMatrix[year][attributeDict[self.attribute]])

		yt_ = np.asarray(yt_) #numpify target training data

		xt_ = []
		for year in range(0, len(dataMatrix)):
			temparray = []
			for att in attributes:
				temparray.append(dataMatrix[year][attributeDict[att]])
			xt_.append(temparray)

		xt_ = np.asarray(xt_)
		#xt_ = np.reshape(41, len(attributes))

		llf = linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg', max_iter=300, multi_class='multinomial', verbose=0, warm_start=False, n_jobs=1)
		llf.fit(xt_,yt_.astype(int))


		for year in range(0,len(dataMatrix2)):
			temparray = []
			for att in attributes:
				temparray = np.append(temparray,(dataMatrix2[year][attributeDict2[att]]))
			temparray2 = np.reshape(temparray, (1,-1))
		 	tempval = llf.predict(temparray2)
		 	yt_ = np.append(yt_, tempval)

		x_ = []
		for year in range(1960, 2015):
			x_.append(year)
		x_ = np.asarray(x_)

		logdict = {}
		for num in range(0, len(x_)):
			logdict[x_[num]] = yt_[num]

		for key, value in logdict.items():
			if(math.isnan(value)):
				del(logdict[key])

		return logdict

Exemplo n.º 14

0

Exibir arquivo

Arquivo: find_correlation_data.py Projeto: afcarl/Global-Indicators

    def calculateCorrelations(self):
        db = DatabaseReader()
        startYear = 1961
        endYear = 2014
        dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
            self.country, (startYear, endYear),
            useCountryCode=False,
            asNumpyMatrix=False)

        rowDic = {}
        currRow = 0
        for yr in range(startYear, endYear + 1):
            rowDic[currRow] = yr
            currRow += 1

        dataMatrix, rowDic = clean.findValidTimeRange(dataMatrix.T, rowDic)
        dataMatrix = dataMatrix.T

        dataMatrix = clean.transformColumns(dataMatrix, clean.smoothByAverage)

        #dataMatrix, colDictionary = clean.removeSparseAttributes(dataMatrix, colDictionary, 0.9)

        #Row is Year
        #Col is Attribute

        attributeDictReverse = {}
        for key, value in attributeDict.items():
            attributeDictReverse[value] = key

        correlations = {}

        # My correlation dictionary should be in the form of {attribute: []}

        for key in attributeDict.keys():
            correlations[key] = []

        for year in range(0, len(dataMatrix)):
            for attr in range(0, len(dataMatrix[year])):
                correlations[attributeDictReverse[attr]].append(
                    dataMatrix[year][attr])

        # I need to handle Nans.

        for attr, array in correlations.items():
            correlations[attr] = pearsonr(array,
                                          correlations[self.attribute])[0]

        values = list(correlations.values())
        keys = list(correlations.keys())

        for key, value in correlations.items():
            if math.isnan(value):
                del correlations[key]

        del correlations[self.attribute]

        self.correlationValues = []
        for key, value in correlations.items():
            value = round(value, 3)
            self.correlationValues.append((key, value))

        self.correlationValues = sorted(self.correlationValues,
                                        key=lambda x: x[1])

Exemplo n.º 15

0

Exibir arquivo

Arquivo: generate_regression_data.py Projeto: Brennan-M/Global-Indicators

	def polynomial(self, degree, attributes):

		"""Read in limited database for training"""
		db = DatabaseReader()
		dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
		self.country, (1960, 2000), useCountryCode=False, asNumpyMatrix=False)

		try:
			clean.transformColumns(dataMatrix, clean.smoothByAverage)
		except ValueError:
			clean.transformColumns(dataMatrix, clean.smoothByReplacement(0))

		valid = False
		for key,value in attributeDict.items():
			if(key == self.attribute):
				valid = True

		if (valid == False):
			return 0


		"""Initialize, fill, and convert target data for training"""
		yt_ = [] #un-numpified training data for y
		for year in range(0, len(dataMatrix)):
			yt_.append(dataMatrix[year][attributeDict[self.attribute]])

		yt_ = np.asarray(yt_) #numpify target training data		

		"""This section creates lists of regression line equations for use in predictions"""
		polylines = []
		eqList = []
		
		#Iterate through attributes and calculate regression lines for each against the target attribute
		for att in attributes:
			xt_ = [] #un-numpified training data for x
			for year in range(0, len(dataMatrix)):
				temp = dataMatrix[year][attributeDict[att]]
				xt_.append(temp)

			xt_ = np.asarray(xt_) #numpify training data for x

			pfit = np.polyfit(xt_,yt_,degree)
			poly = np.poly1d(pfit)
			eqList.append(poly)

			pfit = tuple(pfit)
			polylines.append(pfit)

		"""Read in data up to modelYear, for modeling and comparison"""
		db2 = DatabaseReader()
		dataMatrix2, colDictionary2, attributeDict2 = db.fetchCountryData(
		self.country, (1960, 2014), useCountryCode=False, asNumpyMatrix = False)

		"""Initialize new copy of target attribute data and populate for comparison""" 
		yp_ = yt_
		x_ = []

		for year in range(1960, 2015):
			x_.append(year)

		x_ = np.asarray(x_)

		tempval = 0
		tempsum = 0
		totsum = 0

		#print sum(weights)

		for year in range(41, 55):
			totsum = 0
			for num in range(0, len(attributes)):
				xtemp = dataMatrix2[year][attributeDict2[attributes[num]]]
				regtemp = eqList[num]
				totsum = totsum + (regtemp(xtemp))
			yp_ = np.append(yp_, totsum/(len(attributes)))

		polydict = {}
		for num in range(0, len(x_)):
			polydict[x_[num]] = yp_[num]

		for key, value in polydict.items():
			if(math.isnan(value)):
				del(polydict[key])

		return polydict

Exemplo n.º 16

0

Exibir arquivo

Arquivo: generate_regression_data.py Projeto: afcarl/Global-Indicators

    def ridge(self, attributes):
        db = DatabaseReader()
        dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
            self.country, (1960, 2000),
            useCountryCode=False,
            asNumpyMatrix=False)

        try:
            clean.transformColumns(dataMatrix, clean.smoothByAverage)
        except ValueError:
            clean.transformColumns(dataMatrix, clean.smoothByReplacement(0))

        valid = False
        for key, value in attributeDict.items():
            if (key == self.attribute):
                valid = True

        if (valid == False):
            return 0

        db2 = DatabaseReader()
        dataMatrix2, colDictionary2, attributeDict2 = db.fetchCountryData(
            self.country, (2001, 2014),
            useCountryCode=False,
            asNumpyMatrix=False)

        try:
            clean.transformColumns(dataMatrix2, clean.smoothByAverage)
        except ValueError:
            clean.transformColumns(dataMatrix2, clean.smoothByReplacement(0))
        """Initialize, fill, and convert target data for training"""
        yt_ = []  #un-numpified training data for y
        for year in range(0, len(dataMatrix)):
            yt_.append(dataMatrix[year][attributeDict[self.attribute]])

        yt_ = np.asarray(yt_)  #numpify target training data

        xt_ = []
        for year in range(0, len(dataMatrix)):
            temparray = []
            for att in attributes:
                temparray.append(dataMatrix[year][attributeDict[att]])
            xt_.append(temparray)

        xt_ = np.asarray(xt_)
        #xt_ = np.reshape(41, len(attributes))

        clf = linear_model.Ridge(alpha=1e-06)
        clf.fit(xt_, yt_.astype(int))

        for year in range(0, len(dataMatrix2)):
            temparray = []
            for att in attributes:
                temparray = np.append(temparray,
                                      (dataMatrix2[year][attributeDict2[att]]))
            temparray2 = np.reshape(temparray, (1, -1))
            tempval = clf.predict(temparray2)
            yt_ = np.append(yt_, tempval)

        x_ = []
        for year in range(1960, 2015):
            x_.append(year)
        x_ = np.asarray(x_)

        ridgedict = {}
        for num in range(0, len(x_)):
            ridgedict[x_[num]] = yt_[num]

        for key, value in ridgedict.items():
            if (math.isnan(value)):
                del (ridgedict[key])

        return ridgedict

Exemplo n.º 17

0

Exibir arquivo

Arquivo: piweather_server.py Projeto: theFork/pi-weather

#!/usr/bin/python3
"""Pi Weather Server
"""

from flask import Flask, jsonify, render_template, request
from flask import url_for  # pylint: disable=unused-import

from database_reader import DatabaseReader
from piweather_config import DATABASE_PATH, VERSION

_APP = Flask(__name__)
_DB = DatabaseReader(DATABASE_PATH, 1000)


@_APP.route('/get_available_timeslot')
def get_available_timeslot():
    """Return a json object containing available timestamp range
    """
    return jsonify(_DB.get_available_timeslot())


@_APP.route('/get_data')
def get_data():
    """Return a json object containing all data for the specified time slot

    GET-Parameters:
        start:      start timestamp
        end:        end timestamp
    """
    start = request.args.get('start')
    end = request.args.get('end')

Exemplo n.º 18

0

Exibir arquivo

Arquivo: generate_regression_data.py Projeto: afcarl/Global-Indicators

    def log(self, attributes):
        db = DatabaseReader()
        dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
            self.country, (1960, 2000),
            useCountryCode=False,
            asNumpyMatrix=False)

        try:
            clean.transformColumns(dataMatrix, clean.smoothByAverage)
        except ValueError:
            clean.transformColumns(dataMatrix, clean.smoothByReplacement(0))

        valid = False
        for key, value in attributeDict.items():
            if (key == self.attribute):
                valid = True

        if (valid == False):
            return 0

        db2 = DatabaseReader()
        dataMatrix2, colDictionary2, attributeDict2 = db.fetchCountryData(
            self.country, (2001, 2014),
            useCountryCode=False,
            asNumpyMatrix=False)

        try:
            clean.transformColumns(dataMatrix2, clean.smoothByAverage)
        except ValueError:
            clean.transformColumns(dataMatrix2, clean.smoothByReplacement(0))
        """Initialize, fill, and convert target data for training"""
        yt_ = []  #un-numpified training data for y

        for year in range(0, len(dataMatrix)):
            yt_.append(dataMatrix[year][attributeDict[self.attribute]])

        yt_ = np.asarray(yt_)  #numpify target training data

        xt_ = []
        for year in range(0, len(dataMatrix)):
            temparray = []
            for att in attributes:
                temparray.append(dataMatrix[year][attributeDict[att]])
            xt_.append(temparray)

        xt_ = np.asarray(xt_)
        #xt_ = np.reshape(41, len(attributes))

        llf = linear_model.LogisticRegression(penalty='l2',
                                              dual=False,
                                              tol=0.0001,
                                              C=1.0,
                                              fit_intercept=True,
                                              intercept_scaling=1,
                                              class_weight=None,
                                              random_state=None,
                                              solver='newton-cg',
                                              max_iter=300,
                                              multi_class='multinomial',
                                              verbose=0,
                                              warm_start=False,
                                              n_jobs=1)
        llf.fit(xt_, yt_.astype(int))

        for year in range(0, len(dataMatrix2)):
            temparray = []
            for att in attributes:
                temparray = np.append(temparray,
                                      (dataMatrix2[year][attributeDict2[att]]))
            temparray2 = np.reshape(temparray, (1, -1))
            tempval = llf.predict(temparray2)
            yt_ = np.append(yt_, tempval)

        x_ = []
        for year in range(1960, 2015):
            x_.append(year)
        x_ = np.asarray(x_)

        logdict = {}
        for num in range(0, len(x_)):
            logdict[x_[num]] = yt_[num]

        for key, value in logdict.items():
            if (math.isnan(value)):
                del (logdict[key])

        return logdict

Exemplo n.º 19

0

Exibir arquivo

Arquivo: find_correlation_data.py Projeto: Brennan-M/Global-Indicators

	def calculateCorrelations(self):
		db = DatabaseReader()
		startYear = 1961
		endYear = 2014
		dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
				self.country, (startYear, endYear), useCountryCode=False, asNumpyMatrix=False)

		rowDic = {}
		currRow = 0
		for yr in range(startYear, endYear + 1):
			rowDic[currRow] = yr
			currRow += 1

		dataMatrix, rowDic = clean.findValidTimeRange(dataMatrix.T, rowDic)
		dataMatrix = dataMatrix.T

		dataMatrix = clean.transformColumns(dataMatrix, clean.smoothByAverage)

		#dataMatrix, colDictionary = clean.removeSparseAttributes(dataMatrix, colDictionary, 0.9)

		#Row is Year
		#Col is Attribute

		attributeDictReverse = {}
		for key, value in attributeDict.items():
			attributeDictReverse[value] = key


		correlations = {}

		# My correlation dictionary should be in the form of {attribute: []}

		for key in attributeDict.keys():
			correlations[key] = []


		for year in range(0, len(dataMatrix)):
			for attr in range(0, len(dataMatrix[year])):
				correlations[attributeDictReverse[attr]].append(dataMatrix[year][attr])

		# I need to handle Nans.

		for attr, array in correlations.items():
			correlations[attr] = pearsonr(array, correlations[self.attribute])[0]

		values = list(correlations.values())
		keys = list(correlations.keys())


		for key, value in correlations.items():
			if math.isnan(value):
				del correlations[key]

		del correlations[self.attribute]

		self.correlationValues = []
		for key, value in correlations.items():
			value = round(value, 3)
			self.correlationValues.append((key, value))

		self.correlationValues = sorted(self.correlationValues, key=lambda x: x[1])

Exemplo n.º 20

0

Exibir arquivo

Arquivo: generate_regression_data.py Projeto: Brennan-M/Global-Indicators

	def ridge(self, attributes):
		db = DatabaseReader()
		dataMatrix, colDictionary, attributeDict = db.fetchCountryData(
		self.country, (1960, 2000), useCountryCode=False, asNumpyMatrix=False)

		try:
			clean.transformColumns(dataMatrix, clean.smoothByAverage)
		except ValueError:
			clean.transformColumns(dataMatrix, clean.smoothByReplacement(0))

		valid = False
		for key,value in attributeDict.items():
			if(key == self.attribute):
				valid = True

		if (valid == False):
			return 0

		db2 = DatabaseReader()
		dataMatrix2, colDictionary2, attributeDict2 = db.fetchCountryData(
		self.country, (2001, 2014), useCountryCode=False, asNumpyMatrix = False)

		try:
			clean.transformColumns(dataMatrix2, clean.smoothByAverage)
		except ValueError:
			clean.transformColumns(dataMatrix2, clean.smoothByReplacement(0))


		"""Initialize, fill, and convert target data for training"""
		yt_ = [] #un-numpified training data for y
		for year in range(0, len(dataMatrix)):
			yt_.append(dataMatrix[year][attributeDict[self.attribute]])

		yt_ = np.asarray(yt_) #numpify target training data

		xt_ = []
		for year in range(0, len(dataMatrix)):
			temparray = []
			for att in attributes:
				temparray.append(dataMatrix[year][attributeDict[att]])
			xt_.append(temparray)

		xt_ = np.asarray(xt_)
		#xt_ = np.reshape(41, len(attributes))

		clf = linear_model.Ridge(alpha = 1e-06)
		clf.fit(xt_,yt_.astype(int))


		for year in range(0,len(dataMatrix2)):
			temparray = []
			for att in attributes:
				temparray = np.append(temparray,(dataMatrix2[year][attributeDict2[att]]))
			temparray2 = np.reshape(temparray, (1,-1))
		 	tempval = clf.predict(temparray2)
		 	yt_ = np.append(yt_, tempval)

		x_ = []
		for year in range(1960, 2015):
			x_.append(year)
		x_ = np.asarray(x_)

		ridgedict = {}
		for num in range(0, len(x_)):
			ridgedict[x_[num]] = yt_[num]

		for key, value in ridgedict.items():
			if(math.isnan(value)):
				del(ridgedict[key])

		return ridgedict