def traitementFichierTXT(indicatorSpi, unitEurostat, ageEurostat, sexEurostat, nomFichierTXT): dicIndicator = {} dicNoCountry = {} dicNation = {} dicNation = DBAccess.lectureNationEurostat(dicNation) minimumYearWithActualData = 999999 ## added 20190326 iscedTOindicator = { 'ED0-2': 'edusl', 'ED3_4': 'edusup', 'ED5-8': 'edut', 'ED3-8': 'edust' } fichierTXT = open(nomFichierTXT, 'r') rec1er = fichierTXT.readline() lstrec = rec1er.split(',') dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iAge = dicEurostat['age'] iSex = dicEurostat['sex'] iIsced = dicEurostat['indic'] ## added 20190326 iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n').split(',') age = ligne[iAge].strip() unit = ligne[iUnit].strip() sex = ligne[iSex].strip() indicator = iscedTOindicator[ligne[iIsced].strip()] ## added 20190326 geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if sex == sexEurostat and age == ageEurostat and unit == unitEurostat and indicator == indicatorSpi: # added indicator check to if statement 20190326 vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData( timeSerie, int(startYear), minimumYearWithActualData) dicIndicator[country] = vector except: dicNoCountry[geoEuroStat] = geoEuroStat spiLib.defnoCountry(dicNoCountry, fileLog) dicIndicator = spiLib.reverseAndNormalizeDicNoIndicator( dicIndicator, minimumYearWithActualData, int(endYear)) spiLibCreateTable.createTableCountryLevelEduTech( dicIndicator, G_spiIndicator, minimumYearWithActualData, fileLog, G_tableName)
def traitementFichierTXT(indicatorSpi, unitEurostat, flowEurostat, partnerEurostat, nomFichierTXT): dicIndicator = {} dicNoCountry = {} dicNation = {} dicNation = DBAccess.lectureNationEurostat(dicNation) minimumYearWithActualData = 999999 fichierTXT = open(nomFichierTXT,'r') rec1er = fichierTXT.readline() lstrec = rec1er.split(',') dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iFlow = dicEurostat['flow'] iPartner = dicEurostat['partner'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n').split(',') flow = ligne[iFlow].strip() unit = ligne[iUnit].strip() partner = ligne[iPartner].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if flow == flowEurostat and partner == partnerEurostat and unit == unitEurostat: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData) dicIndicator[country] = vector except: dicNoCountry[geoEuroStat] = geoEuroStat spiLib.defnoCountry(dicNoCountry,fileLog) dicIndicator = spiLib.reverseAndNormalizeDicNoIndicator(dicIndicator, minimumYearWithActualData, int(endYear)) spiLibCreateTable.createTableCountryLevelEduTech(dicIndicator,G_spiIndicator,minimumYearWithActualData,fileLog,G_tableName)
def traitementFichierTXT(indicatorInputEurostat, unitEurostat, nomFichierTXT): startIndice = 0 dicIndicators = {} dicNoCountry = {} dicNation = {} indicatorEurostat = indicatorInputEurostat dicNation = DBAccess.lectureNationEurostat(dicNation) #dicStartValue = dict(startIndice=1,startCountry='',startNace='',startIndicator='',startValeur=0) minimumYearWithActualData = 999999 fichierTXT = open(nomFichierTXT,'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iIndic = dicEurostat['indic'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n').split(',') #RAPPEL strip enleve des extremites indicator = ligne[iIndic].strip() unit = ligne[iUnit].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if indicator in indicatorEurostat and unit == unitEurostat: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData) dicIndicators = spiLib.defDicIndicators(country,indicator,vector,dicIndicators) except: dicNoCountry[geoEuroStat] = geoEuroStat spiLib.defnoCountry(dicNoCountry,fileLog) dicIndicators = spiLib.reverseAndNormalizeDicIndicators(dicIndicators, minimumYearWithActualData, int(endYear)) spiLibCreateTable.createTableCountryLevelOpen(dicIndicators,minimumYearWithActualData,fileLog,G_tableName)
def traitementFichierTXT(indicatorSpi, categoryEurostat, ageEurostat, iscoEurostat, unitEurostat, nomFichierTXT, nomFichierTXT2, naceEurostat, itemEurostat, unitEurostat2): dicIndicator = {} dicIndicator2 = {} dicNoCountry = {} dicNation = {} dicNation = DBAccess.lectureNationEurostat(dicNation) minimumYearWithActualData = 999999 fichierTXT = open(nomFichierTXT, 'r') rec1er = fichierTXT.readline() lstrec = rec1er.split(',') dicEurostat = spiLib.defDicEurostat(lstrec) iCategory = dicEurostat['category'] iUnit = dicEurostat['unit'] iIsco = dicEurostat['isco'] iAge = dicEurostat['age'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n').split(',') category = ligne[iCategory].strip() isco = ligne[iIsco].strip() age = ligne[iAge].strip() unit = ligne[iUnit].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if unit == unitEurostat and age == ageEurostat and isco == iscoEurostat and category == categoryEurostat: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData( timeSerie, int(startYear), minimumYearWithActualData) dicIndicator[country] = vector except: dicNoCountry[geoEuroStat] = geoEuroStat fichierTXT.close() spiLib.defnoCountry(dicNoCountry, fileLog) fichierTXT = open(nomFichierTXT2, 'r') rec1er = fichierTXT.readline() lstrec = rec1er.split(',') dicEurostat = spiLib.defDicEurostat(lstrec) iNace = dicEurostat['nace'] iUnit = dicEurostat['unit'] iIndic = dicEurostat['indic'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear2 = geotime[1].strip() startYear2 = geotime[-1].strip() for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n').split(',') nace = ligne[iNace].strip() unit = ligne[iUnit].strip() indic = ligne[iIndic].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if unit == unitEurostat2 and indic == itemEurostat and nace == naceEurostat: vector = spiLib.defVectorYears(timeSerie, startYear2, endYear2) dicIndicator2[country] = vector except: dicNoCountry[geoEuroStat] = geoEuroStat fichierTXT.close() dicIndicator = spiLib.reverseAndNormalizeDic(dicIndicator, minimumYearWithActualData, int(endYear)) dicIndicator2 = spiLib.reverseAndNormalizeDic(dicIndicator2, minimumYearWithActualData, int(endYear)) spiLibCreateTable.createTableCountryLevelFdi(dicIndicator, dicIndicator2, G_spiIndicator, minimumYearWithActualData, fileLog, G_tableName)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature, compteEurostat): startIndice = 0 dicIndicator = {} dicNoCountry = {} dicNaceCheck = {} dicNace = {} dicNation = {} indicatorSpi = indicatorInput indicatorEurostat = indicatorInputEurostat Unit = G_Unit dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat) dicNation = DBAccess.lectureNationEurostat(dicNation) dicStartValue = dict(startIndice=1, startCountry='', startNace='', startIndicator='', startValeur=0, startYear=1900) minStartYear = 99999 maxEndYear = -1 fichiersTXT.sort() fichiersTXT.reverse() for txt in fichiersTXT: fichierTXT = open(txt, 'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') #on selectionne les colonne de l'input d'Eurostat dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iSize = dicEurostat['size'] iSector = dicEurostat['sector'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n') #RAPPEL strip enleve des extremites ligne = ligne.split(',') nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() unit = ligne[iUnit].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() # la colonne sector existait pour le nama_nace64_e.tsv # il a disparu dans le esa2010 mais on garde le test if iSector == -1: sector = 'S1' else: sector = ligne[iSector].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if indicator == indicatorEurostat and (unit == Unit) and ( sector == 'S1') and dicNace.has_key(nace): dicNaceCheck[ nace] = nace #on remplit le dic pour faire un check si autant de nace que dans la table SPI vector = spiLib.defVectorYears( timeSerie, startYear, endYear) #traitement de la serie Eurostat dicStartValue = spiLib.defDicStartValue( timeSerie, country, nace, indicator, dicStartValue, endYear) dicIndicator = spiLib.defDicIndicator( country, nace, vector, dicIndicator) minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) except: dicNoCountry[geoEuroStat] = geoEuroStat #retour avec l'annee de la 1er valeur existante dans les vecteurs #different de la 1er annee ou vecteur le plus long car la valeur peut etre ':' fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\ ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\ ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) #test des annee de debut, soit par valeur reelle ou par vecteur la valeur peut etre ':' if minStartYear != dicStartValue['startYear']: fileLog.write('annee min. pour les vecteurs =' + str(minStartYear) + ' annee min. avec une valeur =' + str(dicStartValue['startYear']) + '\n') minStartYear = dicStartValue['startYear'] #traitement indicator dicIndicator = spiLib.reverseAndNormalizeDicIndicator( dicIndicator, minStartYear, maxEndYear) spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog, minStartYear, dicNace, indicatorSpi, compteEurostat, G_tableName, G_FileExt) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpi, compteEurostat, G_tableName)
def traitementFichierTXT(tableName, spiIndicator, indicEurostat, postEurostat, unitEurostat, partnerEurostat, gdpIndicEurostat, nomFichierTXT, nomFichierTXTGdp, nomFichierTXT2, partnerEurostat2, itemEurostat, gdpUnit): dicIndicator = {} dicIndicator2 = {} dicGdp = {} dicNoCountry = {} dicNation = {} dicNation = DBAccess.lectureNationEurostat(dicNation) minimumYearWithActualData = 999999 fichierTXT = open(nomFichierTXT,'r') rec1er = fichierTXT.readline() lstrec = rec1er.split(',') dicEurostat = spiLib.defDicEurostat(lstrec) iIndic = dicEurostat['indic'] iPostTime = dicEurostat['post'] iPartner = dicEurostat['partner'] iGeo = dicEurostat['geotime'] posttime = lstrec[iPostTime].split('\t') endYear = int(posttime[1].strip()) startYear = int(posttime[-1].strip()) for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n').split(',') indic = ligne[iIndic].strip() geo = ligne[iGeo].strip() partner = ligne[iPartner].strip() postTime = ligne[iPostTime].split('\t') post = postTime[0].strip() try: country = dicNation[geo] timeSerie = postTime[1:] if partner == partnerEurostat and post == postEurostat and indic == indicEurostat : vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData) dicIndicator[country] = vector except: dicNoCountry[geo] = geo fichierTXT.close() dicIndicator = spiLib.reverseAndNormalizeDic(dicIndicator, minimumYearWithActualData, endYear) fileEndYear = endYear fichierTXT = open(nomFichierTXT2,'r') rec1er = fichierTXT.readline() lstrec = rec1er.split(',') dicEurostat = spiLib.defDicEurostat(lstrec) iIndic = dicEurostat['indic'] iPartnerTime = dicEurostat['partner'] iGeo = dicEurostat['geotime'] iItem = dicEurostat['item'] geotime = lstrec[iGeo].split('\t') endYear2 = int(geotime[1].strip()) startYear2 = int(geotime[-1].strip()) for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n').split(',') indic = ligne[iIndic].strip() item = ligne[iItem].strip() partner = ligne[iPartnerTime].strip() geotime = ligne[iGeo].split('\t') geo = geotime[0].strip() try: country = dicNation[geo] timeSerie = geotime[1:] if partner == partnerEurostat2 and indic == indicEurostat and item == itemEurostat: vector = spiLib.defVectorYears(timeSerie, startYear2, endYear2) dicIndicator2[country] = vector except: dicNoCountry[geo] = geo fichierTXT.close() dicIndicator2 = spiLib.reverseAndNormalizeDic(dicIndicator2, startYear2, endYear2) finalEndYear = endYear2 for country in dicIndicator : if country not in dicIndicator2 : dicIndicator2[country] = [] for i in range(startYear2-1, endYear2) : dicIndicator2[country].append(':') for country in dicIndicator2 : if country not in dicIndicator : dicIndicator[country] = [] for i in range(minimumYearWithActualData-1, endYear) : dicIndicator[country].append(':') yearGap = startYear2 - endYear - 1 if yearGap == 0 : for country in dicIndicator : dicIndicator[country].extend(dicIndicator2[country]) elif yearGap > 0 : fillingVector = [] for i in range(0, yearGap) : fillingVector.append(':') for country in dicIndicator : dicIndicator[country].extend(fillingVector) dicIndicator[country].extend(dicIndicator2[country]) elif yearGap < 0 : for country in dicIndicator : del dicIndicator[country][yearGap:] for country in dicIndicator : dicIndicator[country].extend(dicIndicator2[country]) fichierTXT = open(nomFichierTXTGdp,'r') rec1er = fichierTXT.readline() lstrec = rec1er.split(',') dicEurostat = spiLib.defDicEurostat(lstrec) iIndic = dicEurostat['indic'] iGeoTime = dicEurostat['geotime'] iUnit = dicEurostat['unit'] geotime = lstrec[iGeoTime].split('\t') endYear = int(geotime[1].strip()) startYear = int(geotime[-1].strip()) for ligneTXT in fichierTXT : ligne = ligneTXT.strip('\n').split(',') indic = ligne[iIndic].strip() unit = ligne[iUnit].strip() geoTime = ligne[iGeoTime].split('\t') geoEurostat = geoTime[0].strip() try: country = dicNation[geoEurostat] timeSerie = geoTime[1:] if indic == gdpIndicEurostat and unit == gdpUnit: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicGdp[country] = vector except: dicNoCountry[geoEurostat] = geoEurostat fichierTXT.close() dicGdp = spiLib.reverseAndNormalizeDic(dicGdp, minimumYearWithActualData, finalEndYear) spiLib.defnoCountry(dicNoCountry,fileLog) spiLibCreateTable.createTableCountryLevelFdi(dicIndicator,dicGdp,spiIndicator,minimumYearWithActualData,fileLog,tableName)
def traitementFichierTXT(indicatorEurostatDenominator, unitDenominator, unitNumerator, nomenclature, filesDenominator, fileNumerator, indicatorSpi, fileLog, tableName): dicIndicatorNumerator = {} dicIndicatorDenominator = {} dicNoCountry = {} dicNation = {} dicNaceCheck = {} dicNation = DBAccess.lectureNationEurostat(dicNation) dicNace = spiLib.defSelectdicNace(nomenclature, 'nama') minimumYearWithActualData = 999999 maxEndYear = -1 filesDenominator.sort() filesDenominator.reverse() file = open(fileNumerator, 'r') line1st = file.readline() list1st = line1st.split(',') dicEurostat = spiLib.defDicEurostat(list1st) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iGeoTime = dicEurostat['geotime'] geotime = list1st[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() #This code is added to create special aggregates asked on 30-09-2016 if (indicatorSpi == 'patintens' or indicatorSpi == 'patintrd') and nomenclature == 'nace2': dicNace['C10'] = 'C' dicNace['C11'] = 'C' dicNace['C12'] = 'C' dicNace['C13'] = 'C' dicNace['C14'] = 'C' dicNace['C15'] = 'C' dicNace['C31'] = 'C' dicNace['C32'] = 'C' #________________________________________________________________________ for line in file: lineList = line.strip('\n').split(',') nace = lineList[iNace].strip() unit = lineList[iUnit].strip() geoTime = lineList[iGeoTime].split('\t') geo = geoTime[0].strip() try: dicNaceCheck[nace] = nace country = dicNation[geo] timeSerie = geoTime[1:] if dicNace.has_key(nace) and unit == unitNumerator: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData( timeSerie, int(startYear), minimumYearWithActualData) dicIndicatorNumerator = spiLib.defDicIndicator( country, nace, vector, dicIndicatorNumerator) if int(endYear) > maxEndYear: maxEndYear = int(endYear) except: dicNoCountry[geo] = geo file.close() dicIndicatorNumerator = spiLib.reverseAndNormalizeDic( dicIndicatorNumerator, minimumYearWithActualData, maxEndYear) #This code is added to create special aggregates asked on 30-09-2016 if (indicatorSpi == 'patintens' or indicatorSpi == 'patintrd') and nomenclature == 'nace2': for country in dicIndicatorNumerator: try: C10 = dicIndicatorNumerator[country]['C10'] C11 = dicIndicatorNumerator[country]['C11'] C12 = dicIndicatorNumerator[country]['C12'] except: C10 = [] C11 = [] C12 = [] try: C13 = dicIndicatorNumerator[country]['C13'] C14 = dicIndicatorNumerator[country]['C14'] C15 = dicIndicatorNumerator[country]['C15'] except: C13 = [] C14 = [] C15 = [] try: C31 = dicIndicatorNumerator[country]['C31'] C32 = dicIndicatorNumerator[country]['C32'] except: C31 = [] C32 = [] res = [] for i in range(0, len(C10)): if C10[i] == ':' or C11[i] == ':' or C12[i] == ':': res.append(':') else: res.append( str(float(C10[i]) + float(C11[i]) + float(C12[i]))) if len(res) > 0: dicIndicatorNumerator[country]['C10-C12'] = res res = [] for i in range(0, len(C13)): if C13[i] == ':' or C14[i] == ':' or C15[i] == ':': res.append(':') else: res.append( str(float(C13[i]) + float(C14[i]) + float(C15[i]))) if len(res) > 0: dicIndicatorNumerator[country]['C13-C15'] = res res = [] for i in range(0, len(C31)): if C31[i] == ':' or C32[i] == ':': res.append(':') else: res.append(str(float(C31[i]) + float(C32[i]))) if len(res) > 0: dicIndicatorNumerator[country]['C31_C32'] = res #__________________________________________________________________________ dicIndicatorNumerator = spiLibTotal.calcNaceAggregates( dicIndicatorNumerator, nomenclature, 'nama') for txt in filesDenominator: file = open(txt, 'r') line1st = file.readline() list1st = line1st.split(',') dicEurostat = spiLib.defDicEurostat(list1st) iUnit = dicEurostat['unit'] try: iIndic = dicEurostat['indic'] except: pass iNace = dicEurostat['nace'] iGeoTime = dicEurostat['geotime'] geotime = list1st[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for line in file: lineList = line.strip('\n').split(',') nace = lineList[iNace].strip() if iIndic != -1: indicator = lineList[iIndic].strip() else: indicator = 'noindicator' unit = lineList[iUnit].strip() geoTime = lineList[iGeoTime].split('\t') geo = geoTime[0].strip() try: dicNaceCheck[nace] = nace country = dicNation[geo] timeSerie = geoTime[1:] if indicator != 'noindicator': if indicator == indicatorEurostatDenominator and dicNace.has_key( nace) and unit == unitDenominator: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicIndicatorDenominator = spiLib.defDicIndicator( country, nace, vector, dicIndicatorDenominator) else: if dicNace.has_key(nace) and unit == unitDenominator: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicIndicatorDenominator = spiLib.defDicIndicator( country, nace, vector, dicIndicatorDenominator) except: dicNoCountry[geo] = geo file.close() dicIndicatorDenominator = spiLib.reverseAndNormalizeDic( dicIndicatorDenominator, minimumYearWithActualData, maxEndYear) dicIndicatorDenominator = spiLibTotal.calcNaceAggregates( dicIndicatorDenominator, nomenclature, 'nama') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) spiLibCreateTable.createTableNacePercentage( nomenclature, dicIndicatorNumerator, dicIndicatorDenominator, indicatorSpi, minimumYearWithActualData, fileLog, tableName)
def traitementFichierTXT(indicatorInputEurostat, unitEurostat, nomenclature, sbsFile = ''): startIndice = 0 dicIndicator = {} dicIndicatorTotal = {} dicNoCountry = {} dicNaceCheck = {} dicNace = {} dicNation = {} indicatorEurostat = indicatorInputEurostat dicNace = spiLib.defSelectdicNaceSkillTech(nomenclature) dicNation = DBAccess.lectureNationEurostat(dicNation) dicStartValue = dict(startIndice=1,startCountry='',startNace='',startIndicator='',startValeur=0) minStartYear = 99999 maxEndYear = -1 for txt in fichiersTXT: fichierTXT = open(txt,'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') #on selectionne les colonne de l'input d'Eurostat dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() #nace_r1,indic_sb,size_emp,geo\time #E,V11110,TOTAL,ES 9037 6410 3544 3311 3336 3084 for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n').split(',') #RAPPEL strip enleve des extremites nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() unit = ligne[iUnit].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if indicator == indicatorEurostat and dicNace.has_key(nace) and unit == unitEurostat: dicNaceCheck[nace] = nace vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicStartValue = spiLib.defDicStartValue(timeSerie,country,nace,indicator,dicStartValue, endYear) dicIndicator = spiLib.defDicIndicator(country,nace,vector,dicIndicator) if int(endYear) > maxEndYear : maxEndYear = int(endYear) if int(startYear) < minStartYear : minStartYear = int(startYear) except: dicNoCountry[geoEuroStat] = geoEuroStat fichierTXT.close() fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\ ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+' valeur '+str(dicStartValue['startValeur'])+'\n') spiLib.defnoCountry(dicNoCountry,fileLog) spiLib.defDicNaceCheck(dicNaceCheck,dicNace,fileLog) dicIndicator = spiLib.reverseAndNormalizeDic(dicIndicator, minStartYear, maxEndYear) if nomenclature == 'nace2' : dicSbs = {} fichierTXT = open(sbsFile,'r') rec1er = fichierTXT.readline() lstrec = rec1er.split(',') dicEurostat = spiLib.defDicEurostat(lstrec) iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for ligneTXT in fichierTXT : ligne = ligneTXT.strip('\n').split(',') nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if indicator == 'V12150' and nace in ('N80', 'N81', 'N82') : vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicSbs = spiLib.defDicIndicator(country,nace,vector,dicSbs) except: continue fichierTXT.close() dicSbs = spiLib.reverseAndNormalizeDic(dicSbs, minStartYear, maxEndYear) dicSbs = spiLibTotal.createSkillTechNace2SbsTotal(dicSbs) dicRatio = spiLibTotal.createSkillTechNace2Ratio(dicSbs) del dicSbs dicIndicator = spiLibTotal.addSkillTechNace2RemainingCodes(dicIndicator, dicRatio) spiLibCreateTable.createTableSkillTech(nomenclature,dicIndicator,minStartYear,fileLog,G_tableName)
def traitementFichierTXT(indicatorSpi, indicatorEurostat, nomFichierTXT, nomFichierTXT2, indicatorEurostat2, eurostatUnit): dicIndicator = {} dicIndicator2 = {} dicNoCountry = {} dicNation = {} dicNation = DBAccess.lectureNationEurostat(dicNation) minimumYearWithActualData = 999999 fichierTXT = open(nomFichierTXT,'r') rec1er = fichierTXT.readline() lstrec = rec1er.split(',') dicEurostat = spiLib.defDicEurostat(lstrec) iIndic = dicEurostat['indic'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = int(geotime[1].strip()) startYear = int(geotime[-1].strip()) for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n').split(',') indic = ligne[iIndic].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if indic == indicatorEurostat : vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData) dicIndicator[country] = vector except: dicNoCountry[geoEuroStat] = geoEuroStat fichierTXT.close() dicIndicator = spiLib.reverseAndNormalizeDicNoIndicator(dicIndicator, minimumYearWithActualData, int(endYear)) fichierTXT = open(nomFichierTXT2,'r') rec1er = fichierTXT.readline() lstrec = rec1er.split(',') dicEurostat = spiLib.defDicEurostat(lstrec) iIndic = dicEurostat['indic'] iUnit = dicEurostat['unit'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear2 = int(geotime[1].strip()) startYear2 = int(geotime[-1].strip()) for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n').split(',') indic = ligne[iIndic].strip() unit = ligne[iUnit].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if indic == indicatorEurostat2 and unit == eurostatUnit : vector = spiLib.defVectorYears(timeSerie, startYear2, endYear2) dicIndicator2[country] = vector except: dicNoCountry[geoEuroStat] = geoEuroStat fichierTXT.close() dicIndicator2 = spiLib.reverseAndNormalizeDicNoIndicator(dicIndicator2, int(startYear2), int(endYear2)) for country in dicIndicator : if country not in dicIndicator2 : dicIndicator2[country] = [] for i in range(startYear2-1, endYear2) : dicIndicator2[country].append(':') for country in dicIndicator2 : if country not in dicIndicator : dicIndicator[country] = [] for i in range(minimumYearWithActualData-1, endYear) : dicIndicator[country].append(':') yearGap = startYear2 - endYear - 1 if yearGap == 0 : for country in dicIndicator : dicIndicator[country].extend(dicIndicator2[country]) elif yearGap > 0 : fillingVector = [] for i in range(0, yearGap) : fillingVector.append(':') for country in dicIndicator : dicIndicator[country].extend(fillingVector) dicIndicator[country].extend(dicIndicator2[country]) elif yearGap < 0 : for country in dicIndicator : del dicIndicator[country][yearGap:] for country in dicIndicator : dicIndicator[country].extend(dicIndicator2[country]) spiLib.defnoCountry(dicNoCountry,fileLog) spiLibCreateTable.createTableCountryLevelEduTech(dicIndicator,G_spiIndicator,minimumYearWithActualData,fileLog,G_tableName)
def traitementFichierTXT(indicatorEurostatDenominator, indicatorEurostatNumerator, unitDenominator, unitNumerator, nomenclature, filesDenominator, filesNumerator, baseYear, indicatorSpi, fileLog, tableName): dicIndicatorNumerator = {} dicIndicatorDenominator = {} dicNoCountry = {} dicNation = {} dicNaceCheck = {} dicNation = DBAccess.lectureNationEurostat(dicNation) dicNace = spiLib.defSelectdicNace(nomenclature, 'nama') minimumYearWithActualData = 999999 maxEndYear = -1 filesNumerator.sort() filesNumerator.reverse() filesDenominator.sort() filesDenominator.reverse() for txt in filesNumerator : file = open(txt, 'r') line1st = file.readline() list1st = line1st.split(',') dicEurostat = spiLib.defDicEurostat(list1st) iUnit = dicEurostat['unit'] iIndic = dicEurostat['indic'] iNace = dicEurostat['nace'] iGeoTime = dicEurostat['geotime'] geotime = list1st[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for line in file : lineList = line.strip('\n').split(',') nace = lineList[iNace].strip() indicator = lineList[iIndic].strip() unit = lineList[iUnit].strip() geoTime = lineList[iGeoTime].split('\t') geo = geoTime[0].strip() try : dicNaceCheck[nace] = nace country = dicNation[geo] timeSerie = geoTime[1:] if indicator == indicatorEurostatNumerator and dicNace.has_key(nace) and unit == unitNumerator: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData) dicIndicatorNumerator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorNumerator) if int(endYear) > maxEndYear : maxEndYear = int(endYear) except : dicNoCountry[geo] = geo file.close() dicIndicatorNumerator = spiLib.reverseAndNormalizeDic(dicIndicatorNumerator, minimumYearWithActualData, maxEndYear) dicIndicatorNumerator = spiLibTotal.calcNaceAggregates(dicIndicatorNumerator, nomenclature, 'nama') for txt in filesDenominator : file = open(txt, 'r') line1st = file.readline() list1st = line1st.split(',') dicEurostat = spiLib.defDicEurostat(list1st) iUnit = dicEurostat['unit'] iIndic = dicEurostat['indic'] iNace = dicEurostat['nace'] iGeoTime = dicEurostat['geotime'] iSector = dicEurostat['sector'] geotime = list1st[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for line in file : lineList = line.strip('\n').split(',') nace = lineList[iNace].strip() indicator = lineList[iIndic].strip() unit = lineList[iUnit].strip() geoTime = lineList[iGeoTime].split('\t') if iSector == -1 : sector = 'S1' else : sector = lineList[iSector].strip() geo = geoTime[0].strip() try : dicNaceCheck[nace] = nace country = dicNation[geo] timeSerie = geoTime[1:] if indicator == indicatorEurostatDenominator and dicNace.has_key(nace) and unit == unitDenominator and sector == 'S1' : vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicIndicatorDenominator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorDenominator) except : dicNoCountry[geo] = geo file.close() dicIndicatorDenominator = spiLib.reverseAndNormalizeDic(dicIndicatorDenominator, minimumYearWithActualData, maxEndYear) dicIndicatorDenominator = spiLibTotal.calcNaceAggregates(dicIndicatorDenominator, nomenclature, 'nama') #The following lines are added on demand of B2 team to calculate aggregates that cannot be extracted from the nama_nace10_e file if nomenclature == 'nace2' and indicatorEurostatDenominator == 'EMP': dicIndicatorDenominator = spiLibTotal.calcNace2EmpAggr(dicIndicatorDenominator) ############################################################################################################################## spiLib.defnoCountry(dicNoCountry,fileLog) spiLib.defDicNaceCheck(dicNaceCheck,dicNace,fileLog) spiLibCreateTable.createTableDomesticIndex(nomenclature,dicIndicatorNumerator, dicIndicatorDenominator, indicatorSpi, minimumYearWithActualData, baseYear,fileLog, tableName)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature, compteEurostat): startIndice = 0 dicIndicator = {} dicIndicatorTotal = {} dicNoCountry = {} dicNaceCheck = {} dicNace = {} dicAgregatNace = {} dicNation = {} indicatorSpi = indicatorInput indicatorSpiTotal = G_IndicatorSPI_T indicatorEurostat = indicatorInputEurostat dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat) dicNation = DBAccess.lectureNationEurostat(dicNation) dicStartValue = dict(startIndice=1, startCountry='', startNace='', startIndicator='', startValeur=0, startYear=1900) minStartYear = 99999 maxEndYear = -1 for txt in fichiersTXT: fichierTXT = open(txt, 'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') #on selectionne les colonne de l'input d'Eurostat dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iSize = dicEurostat['size'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() #nace_r1,indic_sb,size_emp,geo\time #E,V11110,TOTAL,ES 9037 6410 3544 3311 3336 3084 for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n') #RAPPEL strip enleve des extremites ligne = ligne.split(',') nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] if (indicator == indicatorEurostat ) and\ ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \ (nomenclature == 'nace2' and len(nace) < 4)): dicNaceCheck[ nace] = nace #on remplit le dic pour faire un check si autant de nace que dans la table SPI vector = spiLib.defVectorYears( timeSerie, startYear, endYear) #traitement de la serie Eurostat dicStartValue = spiLib.defDicStartValue( timeSerie, country, nace, indicator, dicStartValue, endYear) dicIndicator = spiLib.defDicIndicator( country, nace, vector, dicIndicator) minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) except: dicNoCountry[geoEuroStat] = geoEuroStat fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\ ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\ ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) minStartYear = dicStartValue['startYear'] dicIndicator = spiLib.reverseAndNormalizeDicIndicator( dicIndicator, minStartYear, maxEndYear) #creation indicateur SPI par ex vabus avec en retour le dic des agregats dicAgregatNace = spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog, minStartYear, dicNace, indicatorSpi, compteEurostat, G_tableName) if indicatorSpiTotal != 'noTotal': #creation indicateur total SPI par ex vabussh spiLibCreateTable.createTableTotal(nomenclature, dicAgregatNace, dicIndicator, minStartYear, fileLog, dicNace, indicatorSpiTotal, compteEurostat, G_tableName) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpiTotal, compteEurostat, G_tableName) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpi, compteEurostat, G_tableName)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature, compteEurostat): startIndice = 0 dicIndicator = {} dicIndicatorTotal = {} dicNoCountry = {} dicNaceCheck = {} dicNace = {} dicNation = {} dicSize = {} indicatorSpi = indicatorInput indicatorEurostat = indicatorInputEurostat dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat) dicNation = DBAccess.lectureNationEurostat(dicNation) dicStartValue = dict(startIndice=1, startCountry='', startNace='', startIndicator='', startValeur=0, startYear=1900) minStartYear = 99999 maxEndYear = -1 for txt in fichiersTXT: fichierTXT = open(txt, 'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') #on selectionne les colonne de l'input d'Eurostat dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iSize = dicEurostat['size'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() #nace_r1,indic_sb,size_emp,geo\time #E,V11110,TOTAL,ES 9037 6410 3544 3311 3336 3084 for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n') #RAPPEL strip enleve des extremites ligne = ligne.split(',') nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() size = ligne[iSize].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] #print indicator,indicatorEurostat,indicatorInput,size,nomenclature,nace if (indicator == indicatorEurostat and (G_LstSize.count(size))) and\ ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \ (nomenclature == 'nace2' and len(nace) < 4)): dicSize[size] = size #pour connaitre les size de eurostat try: #on cherche l'indicateur SPI correspondant indicator_size = indicatorSpi + '_' + G_DicSize[size] except: fileLog.write('pas de size ' + size + ' indicateur : ' + indicator + ' country : ' + country + '\n') continue #on passe on record suivant dicNaceCheck[ nace] = nace #on remplit le dic pour faire un check si autant de nace que dans la table SPI vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicStartValue = spiLib.defDicStartValue( timeSerie, country, nace, indicator, dicStartValue, endYear) dicIndicator = spiLib.defDicIndicatorSize( country, nace, vector, indicator_size, dicIndicator) minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) except: dicNoCountry[geoEuroStat] = geoEuroStat fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\ ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\ ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) keySize = dicSize.keys() keySize.sort() for s in keySize: fileLog.write(' List Size in Eurostat Input : ' + s + '\n') minStartYear = dicStartValue['startYear'] dicIndicator = spiLib.reverseAndNormalizeDicIndicatorSize( dicIndicator, minStartYear, maxEndYear) spiLibCreateTable.createTableSize(nomenclature, dicIndicator, fileLog, minStartYear, dicNace, indicatorSpi, compteEurostat, G_tableName) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpi, compteEurostat, G_tableName, G_DicSize)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature, compteEurostat): startIndice = 0 dicIndicator = {} dicNoCountry = {} dicNaceCheck = {} dicNace = {} dicNation = {} indicatorSpi = indicatorInput indicatorEurostat = indicatorInputEurostat Unit = G_Unit dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat) dicNation = DBAccess.lectureNationEurostat(dicNation) dicStartValue = dict(startIndice=1, startCountry='', startNace='', startIndicator='', startValeur=0, startYear=1900) minStartYear = 99999 maxEndYear = -1 fichiersTXT.sort() fichiersTXT.reverse() for txt in fichiersTXT: fichierTXT = open(txt, 'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') #recherche de la position de chaque variable dans input eurostat #on selectionne les colonne de l'input d'Eurostat dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iSector = dicEurostat['sector'] iSize = dicEurostat['size'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() #nace 1 : unit,sector,nace_r1,indic_na,geo\time #nace 2 : unit,nace_r2,indic_na,sector,geo\time #MIO_EUR,A,B1G,AT : : : 3781.7 4375.5 4322.6 3827.4 3542.7 3921.8 3763.1 for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n') #RAPPEL strip enleve des extremites ligne = ligne.split(',') nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() unit = ligne[iUnit].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() if iSector == -1: sector = 'S1' else: sector = ligne[iSector].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] #indicateur pour savoir si on doit selectionner les indicateurs #dans la liste definie dans la table oracle ou uniquement sur la longueur (4) #la regle est si nace1 alors on prend la liste #si nace2 et nama on prend la liste sur sbs on teste sur la longueur max 4 if indicator == indicatorEurostat and (unit == Unit) and ( sector == 'S1') and dicNace.has_key(nace): dicNaceCheck[ nace] = nace #on remplit le dic pour faire un check si autant de nace que dans la table SPI vector = spiLib.defVectorYears( timeSerie, startYear, endYear) #traitement de la serie Eurostat dicStartValue = spiLib.defDicStartValue( timeSerie, country, nace, indicator, dicStartValue, endYear) dicIndicator = spiLib.defDicIndicator( country, nace, vector, dicIndicator) minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) except: dicNoCountry[geoEuroStat] = geoEuroStat fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\ ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\ ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) minStartYear = dicStartValue['startYear'] dicIndicator = spiLib.reverseAndNormalizeDicIndicator( dicIndicator, minStartYear, maxEndYear) spiLibCreateTable.createTableGrowth(nomenclature, dicIndicator, fileLog, minStartYear, dicNace, indicatorSpi, compteEurostat, G_tableName, G_Growth, G_FileExt) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpi, compteEurostat, G_tableName)
def traitementFichierTXT(indicatorInput, nomenclature, compteEurostat): startIndice = 0 dicIndicator = {} dicIndicatorTotal = {} dicNoCountry = {} dicNaceCheck = {} dicNace = {} dicNation = {} dicIndicatorDomain = {} dicIndicatorDomain = FileAccess.lectureIndicator(dicIndicatorDomain, 'competition', dirUse) indicatorSpi = dicIndicatorDomain[indicatorInput] dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat) dicNation = DBAccess.lectureNationEurostat(dicNation) dicStartValue = dict(startIndice=1, startCountry='', startNace='', startIndicator='', startValeur=0, startYear=1900) minStartYear = 99999 maxEndYear = -1 for txt in fichiersTXT: fichierTXT = open(txt, 'r') rec1er = fichierTXT.readline() #1er rec avec les meta lstrec = rec1er.split(',') #on selectionne les colonne de l'input d'Eurostat dicEurostat = spiLib.defDicEurostat(lstrec) iUnit = dicEurostat['unit'] iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iSize = dicEurostat['size'] iGeoTime = dicEurostat['geotime'] geotime = lstrec[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() #nace_r1,indic_sb,size_emp,geo\time #E,V11110,TOTAL,ES 9037 6410 3544 3311 3336 3084 for ligneTXT in fichierTXT: ligne = ligneTXT.strip('\n') #RAPPEL strip enleve des extremites ligne = ligne.split(',') nace = ligne[iNace].strip() indicator = ligne[iIndic].strip() sizeEurostat = ligne[iSize].strip() #la colonne size n'existe pas pour le NACE2 pour les sbs_na *on rempli donc la condition a chaque fois if nomenclature == 'nace2' and compteEurostat == 'sbs': sizeEurostat = 'TOTAL' else: sizeEurostat = ligne[iSize].strip() geoTime = ligne[iGeoTime].split('\t') geoEuroStat = geoTime[0].strip() try: country = dicNation[geoEuroStat] timeSerie = geoTime[1:] #indicateur pour savoir si on doit selectionner les indicateurs #dans la liste definie dans la table oracle ou uniquement sur la longueur (4) #la regle est si nace1 alors on prend la liste #si bd on prend tout #if indicator == indicatorInput and sizeEurostat == G_Size and dicNace.has_key(nace): if indicator == indicatorInput and sizeEurostat == G_Size: dicNaceCheck[ nace] = nace #on remplit le dic pour faire un check si autant de nace que dans la table SPI vector = spiLib.defVectorYears(timeSerie, startYear, endYear) dicStartValue = spiLib.defDicStartValue( timeSerie, country, nace, indicator, dicStartValue, endYear) dicIndicator = spiLib.defDicIndicator( country, nace, vector, dicIndicator) minStartYear, maxEndYear = spiLib.defMinMaxYear( startYear, minStartYear, endYear, maxEndYear) except: dicNoCountry[geoEuroStat] = geoEuroStat fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\ ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\ ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n') spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) minStartYear = dicStartValue['startYear'] #traitement indicator dicIndicator = spiLib.reverseAndNormalizeDicIndicator( dicIndicator, minStartYear, maxEndYear) spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog, minStartYear, dicNace, indicatorSpi, compteEurostat, G_tableName) spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog, indicatorSpi, compteEurostat, G_tableName)
def traitementFichierTXT(nomenclature, indicNumerator, indicDenominator, filePaths): dicNumerator = {} dicDenominator = {} dicNace = spiLib.defSelectdicNace(nomenclature, 'sbs') dicNation = DBAccess.lectureNationEurostat({}) maxEndYear = -1 minStartYear = 9999 for filePath in filePaths: with open(filePath, 'r') as file: csvFile = csv.reader(file, delimiter='\t') firstLine = csvFile.next() metaLabel = firstLine[0].split(',') dicEurostat = spiLib.defDicEurostat(metaLabel) iNace = dicEurostat['nace'] iIndic = dicEurostat['indic'] iGeoTime = dicEurostat['geotime'] iSize = dicEurostat['size'] endYear = firstLine[1].strip() startYear = firstLine[-1].strip() if int(endYear) > maxEndYear: maxEndYear = int(endYear) if int(startYear) < minStartYear: minStartYear = int(startYear) for line in csvFile: meta = line[0].split(',') code = meta[iNace] country = meta[iGeoTime] indic = meta[iIndic] if nomenclature == 'nace2': size = 'TOTAL' else: size = meta[iSize] if code in dicNace and country in dicNation and size == 'TOTAL': if indic == indicNumerator or indic == indicDenominator: vector = [] vector.append(endYear) vector.extend([ re.sub('[" ",a-z]', '', element) for element in line[1:] ]) vector.append(startYear) if indic == indicNumerator: dicNumerator = spiLib.defDicIndicator( country, code, vector, dicNumerator) else: dicDenominator = spiLib.defDicIndicator( country, code, vector, dicDenominator) dicNumerator = spiLib.reverseAndNormalizeDic(dicNumerator, minStartYear, maxEndYear) dicDenominator = spiLib.reverseAndNormalizeDic(dicDenominator, minStartYear, maxEndYear) dicNumerator = spiLibTotal.calcNaceAggregates(dicNumerator, nomenclature, 'sbs') dicDenominator = spiLibTotal.calcNaceAggregates(dicDenominator, nomenclature, 'sbs') spiLibCreateTable.createTableOverOtherShare(dicNumerator, dicDenominator, minStartYear, 'surpl', nomenclature, 'competition')
def traitementFichierTXT(indicatorEurostat, unitEurostat, nomenclature, cpa, files, fileLog, tableName): infoX = DBAccess.lectureCpaNaceIndicatorData('x', cpa, 'external') infoM = DBAccess.lectureCpaNaceIndicatorData('m', cpa, 'external') refDicX = infoX[0] startYearX = infoX[1] refDicM = infoM[0] startYearM = infoM[1] refDicGO = {} dicNoCountry = {} dicNation = {} dicNaceCheck = {} dicNation = DBAccess.lectureNationEurostat(dicNation) dicNace = spiLib.defSelectdicNace(nomenclature, 'nama') minimumYearWithActualData = 999999 maxEndYear = -1 files.sort() files.reverse() for txt in files: file = open(txt, 'r') line1st = file.readline() list1st = line1st.split(',') dicEurostat = spiLib.defDicEurostat(list1st) iUnit = dicEurostat['unit'] iIndic = dicEurostat['indic'] iNace = dicEurostat['nace'] iGeoTime = dicEurostat['geotime'] geotime = list1st[iGeoTime].split('\t') endYear = geotime[1].strip() startYear = geotime[-1].strip() for line in file: lineList = line.strip('\n').split(',') nace = lineList[iNace].strip() indicator = lineList[iIndic].strip() unit = lineList[iUnit].strip() geoTime = lineList[iGeoTime].split('\t') geo = geoTime[0].strip() try: dicNaceCheck[nace] = nace country = dicNation[geo] timeSerie = geoTime[1:] if indicator == indicatorEurostat and dicNace.has_key( nace) and unit == unitEurostat: vector = spiLib.defVectorYears(timeSerie, startYear, endYear) minimumYearWithActualData = spiLib.findMinimumYearWithActualData( timeSerie, int(startYear), minimumYearWithActualData) refDicGO = spiLib.defDicIndicator(country, nace, vector, refDicGO) if int(endYear) > maxEndYear: maxEndYear = int(endYear) except: dicNoCountry[geo] = geo refDicGO = spiLib.reverseAndNormalizeDic(refDicGO, startYearX, maxEndYear) refDicGO = spiLibTotal.calcNaceAggregates(refDicGO, nomenclature, 'nama') refDicX = spiLib.normalizeDicSize(refDicX, startYearX, startYearX, maxEndYear) refDicM = spiLib.normalizeDicSize(refDicM, startYearM, startYearX, maxEndYear) spiLib.defnoCountry(dicNoCountry, fileLog) spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog) spiLibCreateTable.createTableCompetitionImportpen(nomenclature, refDicGO, refDicX, refDicM, startYearX, fileLog, tableName)