Пример #1
0
def traitementFichierTXT(indicatorEurostatDenominator, unitDenominator,
                         unitNumerator, nomenclature, filesDenominator,
                         fileNumerator, indicatorSpi, fileLog, tableName):
    dicIndicatorNumerator = {}
    dicIndicatorDenominator = {}
    dicNoCountry = {}
    dicNation = {}
    dicNaceCheck = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicNace = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData = 999999
    maxEndYear = -1

    filesDenominator.sort()
    filesDenominator.reverse()

    file = open(fileNumerator, 'r')
    line1st = file.readline()
    list1st = line1st.split(',')
    dicEurostat = spiLib.defDicEurostat(list1st)
    iUnit = dicEurostat['unit']
    iNace = dicEurostat['nace']
    iGeoTime = dicEurostat['geotime']
    geotime = list1st[iGeoTime].split('\t')

    endYear = geotime[1].strip()
    startYear = geotime[-1].strip()

    #This code is added to create special aggregates asked on 30-09-2016
    if (indicatorSpi == 'patintens'
            or indicatorSpi == 'patintrd') and nomenclature == 'nace2':
        dicNace['C10'] = 'C'
        dicNace['C11'] = 'C'
        dicNace['C12'] = 'C'
        dicNace['C13'] = 'C'
        dicNace['C14'] = 'C'
        dicNace['C15'] = 'C'
        dicNace['C31'] = 'C'
        dicNace['C32'] = 'C'
    #________________________________________________________________________

    for line in file:
        lineList = line.strip('\n').split(',')
        nace = lineList[iNace].strip()
        unit = lineList[iUnit].strip()
        geoTime = lineList[iGeoTime].split('\t')
        geo = geoTime[0].strip()

        try:
            dicNaceCheck[nace] = nace
            country = dicNation[geo]
            timeSerie = geoTime[1:]

            if dicNace.has_key(nace) and unit == unitNumerator:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                    timeSerie, int(startYear), minimumYearWithActualData)
                dicIndicatorNumerator = spiLib.defDicIndicator(
                    country, nace, vector, dicIndicatorNumerator)
                if int(endYear) > maxEndYear:
                    maxEndYear = int(endYear)
        except:
            dicNoCountry[geo] = geo

    file.close()

    dicIndicatorNumerator = spiLib.reverseAndNormalizeDic(
        dicIndicatorNumerator, minimumYearWithActualData, maxEndYear)

    #This code is added to create special aggregates asked on 30-09-2016
    if (indicatorSpi == 'patintens'
            or indicatorSpi == 'patintrd') and nomenclature == 'nace2':
        for country in dicIndicatorNumerator:
            try:
                C10 = dicIndicatorNumerator[country]['C10']
                C11 = dicIndicatorNumerator[country]['C11']
                C12 = dicIndicatorNumerator[country]['C12']
            except:
                C10 = []
                C11 = []
                C12 = []

            try:
                C13 = dicIndicatorNumerator[country]['C13']
                C14 = dicIndicatorNumerator[country]['C14']
                C15 = dicIndicatorNumerator[country]['C15']
            except:
                C13 = []
                C14 = []
                C15 = []

            try:
                C31 = dicIndicatorNumerator[country]['C31']
                C32 = dicIndicatorNumerator[country]['C32']
            except:
                C31 = []
                C32 = []

            res = []

            for i in range(0, len(C10)):
                if C10[i] == ':' or C11[i] == ':' or C12[i] == ':':
                    res.append(':')
                else:
                    res.append(
                        str(float(C10[i]) + float(C11[i]) + float(C12[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C10-C12'] = res

            res = []

            for i in range(0, len(C13)):
                if C13[i] == ':' or C14[i] == ':' or C15[i] == ':':
                    res.append(':')
                else:
                    res.append(
                        str(float(C13[i]) + float(C14[i]) + float(C15[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C13-C15'] = res

            res = []

            for i in range(0, len(C31)):
                if C31[i] == ':' or C32[i] == ':':
                    res.append(':')
                else:
                    res.append(str(float(C31[i]) + float(C32[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C31_C32'] = res
    #__________________________________________________________________________

    dicIndicatorNumerator = spiLibTotal.calcNaceAggregates(
        dicIndicatorNumerator, nomenclature, 'nama')

    for txt in filesDenominator:
        file = open(txt, 'r')
        line1st = file.readline()
        list1st = line1st.split(',')
        dicEurostat = spiLib.defDicEurostat(list1st)
        iUnit = dicEurostat['unit']
        try:
            iIndic = dicEurostat['indic']
        except:
            pass
        iNace = dicEurostat['nace']
        iGeoTime = dicEurostat['geotime']
        geotime = list1st[iGeoTime].split('\t')

        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        for line in file:
            lineList = line.strip('\n').split(',')
            nace = lineList[iNace].strip()
            if iIndic != -1:
                indicator = lineList[iIndic].strip()
            else:
                indicator = 'noindicator'
            unit = lineList[iUnit].strip()
            geoTime = lineList[iGeoTime].split('\t')
            geo = geoTime[0].strip()

            try:
                dicNaceCheck[nace] = nace
                country = dicNation[geo]
                timeSerie = geoTime[1:]
                if indicator != 'noindicator':
                    if indicator == indicatorEurostatDenominator and dicNace.has_key(
                            nace) and unit == unitDenominator:
                        vector = spiLib.defVectorYears(timeSerie, startYear,
                                                       endYear)
                        dicIndicatorDenominator = spiLib.defDicIndicator(
                            country, nace, vector, dicIndicatorDenominator)

                else:
                    if dicNace.has_key(nace) and unit == unitDenominator:
                        vector = spiLib.defVectorYears(timeSerie, startYear,
                                                       endYear)
                        dicIndicatorDenominator = spiLib.defDicIndicator(
                            country, nace, vector, dicIndicatorDenominator)
            except:
                dicNoCountry[geo] = geo

        file.close()

    dicIndicatorDenominator = spiLib.reverseAndNormalizeDic(
        dicIndicatorDenominator, minimumYearWithActualData, maxEndYear)
    dicIndicatorDenominator = spiLibTotal.calcNaceAggregates(
        dicIndicatorDenominator, nomenclature, 'nama')

    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)

    spiLibCreateTable.createTableNacePercentage(
        nomenclature, dicIndicatorNumerator, dicIndicatorDenominator,
        indicatorSpi, minimumYearWithActualData, fileLog, tableName)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    Unit = G_Unit
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    fichiersTXT.sort()
    fichiersTXT.reverse()
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iSector = dicEurostat['sector']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            unit = ligne[iUnit].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            # la colonne sector existait pour le nama_nace64_e.tsv
            # il a disparu dans le esa2010 mais on garde le test
            if iSector == -1:
                sector = 'S1'
            else:
                sector = ligne[iSector].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                if indicator == indicatorEurostat and (unit == Unit) and (
                        sector == 'S1') and dicNace.has_key(nace):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    #retour avec l'annee de la  1er valeur existante dans les vecteurs
    #different de la 1er annee ou vecteur le plus long car la valeur peut etre ':'
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    #test des annee de debut, soit par valeur reelle ou par vecteur la valeur peut etre ':'
    if minStartYear != dicStartValue['startYear']:
        fileLog.write('annee min. pour les vecteurs =' + str(minStartYear) +
                      ' annee min. avec une valeur =' +
                      str(dicStartValue['startYear']) + '\n')
        minStartYear = dicStartValue['startYear']
    #traitement indicator
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog,
                                  minStartYear, dicNace, indicatorSpi,
                                  compteEurostat, G_tableName, G_FileExt)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
Пример #3
0
def traitementFichierTXT(indicatorEurostatDenominator, indicatorEurostatNumerator, unitDenominator, unitNumerator, nomenclature, filesDenominator, filesNumerator, baseYear, indicatorSpi, fileLog, tableName):
    dicIndicatorNumerator       = {}
    dicIndicatorDenominator     = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    dicNaceCheck                = {}
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    dicNace                     = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData   = 999999
    maxEndYear                  = -1
    
    filesNumerator.sort()
    filesNumerator.reverse()    
    filesDenominator.sort()
    filesDenominator.reverse()
    
    for txt in filesNumerator :
        file           = open(txt, 'r')
        line1st        = file.readline()
        list1st        = line1st.split(',')
        dicEurostat    = spiLib.defDicEurostat(list1st)
        iUnit          = dicEurostat['unit']
        iIndic         = dicEurostat['indic']
        iNace          = dicEurostat['nace']
        iGeoTime       = dicEurostat['geotime']
        geotime        = list1st[iGeoTime].split('\t')
        
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        
        for line in file :
            lineList      = line.strip('\n').split(',')
            nace          = lineList[iNace].strip()
            indicator     = lineList[iIndic].strip()
            unit          = lineList[iUnit].strip()
            geoTime       = lineList[iGeoTime].split('\t')            
            geo           = geoTime[0].strip()
            
            try :
                dicNaceCheck[nace] = nace
                country = dicNation[geo]                    
                timeSerie = geoTime[1:]
                
                if indicator == indicatorEurostatNumerator and dicNace.has_key(nace) and unit == unitNumerator:
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                    dicIndicatorNumerator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorNumerator)
                    if int(endYear) > maxEndYear :
                        maxEndYear = int(endYear)
            except :
                dicNoCountry[geo] = geo
                
        file.close()
    
    dicIndicatorNumerator = spiLib.reverseAndNormalizeDic(dicIndicatorNumerator, minimumYearWithActualData, maxEndYear)
    dicIndicatorNumerator = spiLibTotal.calcNaceAggregates(dicIndicatorNumerator, nomenclature, 'nama')
    
    for txt in filesDenominator :
        file           = open(txt, 'r')
        line1st        = file.readline()
        list1st        = line1st.split(',')
        dicEurostat    = spiLib.defDicEurostat(list1st)
        iUnit          = dicEurostat['unit']
        iIndic         = dicEurostat['indic']
        iNace          = dicEurostat['nace']
        iGeoTime       = dicEurostat['geotime']
        iSector        = dicEurostat['sector'] 
        geotime        = list1st[iGeoTime].split('\t')
        
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        
        for line in file :
            lineList      = line.strip('\n').split(',')
            nace          = lineList[iNace].strip()
            indicator     = lineList[iIndic].strip()
            unit          = lineList[iUnit].strip()
            geoTime       = lineList[iGeoTime].split('\t')  
            if iSector == -1 : 
                sector = 'S1'
            else :
                sector = lineList[iSector].strip()
            geo           = geoTime[0].strip()
            
            try :
                dicNaceCheck[nace] = nace
                country = dicNation[geo]                    
                timeSerie = geoTime[1:]
                
                if indicator == indicatorEurostatDenominator and dicNace.has_key(nace) and unit == unitDenominator and sector == 'S1' :
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    dicIndicatorDenominator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorDenominator)
            except :
              dicNoCountry[geo] = geo  
        
        file.close()
        
    dicIndicatorDenominator = spiLib.reverseAndNormalizeDic(dicIndicatorDenominator, minimumYearWithActualData, maxEndYear)
    dicIndicatorDenominator = spiLibTotal.calcNaceAggregates(dicIndicatorDenominator, nomenclature, 'nama')
    
    #The following lines are added on demand of B2 team to calculate aggregates that cannot be extracted from the nama_nace10_e file 
    if nomenclature == 'nace2' and indicatorEurostatDenominator == 'EMP':
        dicIndicatorDenominator = spiLibTotal.calcNace2EmpAggr(dicIndicatorDenominator)
    
    ##############################################################################################################################
    
    spiLib.defnoCountry(dicNoCountry,fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck,dicNace,fileLog)   
    
    spiLibCreateTable.createTableDomesticIndex(nomenclature,dicIndicatorNumerator, dicIndicatorDenominator, indicatorSpi, minimumYearWithActualData, baseYear,fileLog, tableName)     
Пример #4
0
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicAgregatNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorSpiTotal = G_IndicatorSPI_T
    indicatorEurostat = indicatorInputEurostat
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                if (indicator == indicatorEurostat ) and\
                 ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \
                 (nomenclature == 'nace2' and len(nace) < 4)):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat

    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    #creation indicateur SPI par ex vabus avec en retour le dic des agregats
    dicAgregatNace = spiLibCreateTable.createTable(nomenclature, dicIndicator,
                                                   fileLog, minStartYear,
                                                   dicNace, indicatorSpi,
                                                   compteEurostat, G_tableName)
    if indicatorSpiTotal != 'noTotal':
        #creation indicateur total SPI par ex vabussh
        spiLibCreateTable.createTableTotal(nomenclature, dicAgregatNace,
                                           dicIndicator, minStartYear, fileLog,
                                           dicNace, indicatorSpiTotal,
                                           compteEurostat, G_tableName)
        spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear,
                                        fileLog, indicatorSpiTotal,
                                        compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
Пример #5
0
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    dicSize = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1

    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            size = ligne[iSize].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #print indicator,indicatorEurostat,indicatorInput,size,nomenclature,nace
                if (indicator == indicatorEurostat and (G_LstSize.count(size))) and\
                 ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \
                 (nomenclature == 'nace2' and len(nace) < 4)):
                    dicSize[size] = size  #pour connaitre les size de eurostat
                    try:  #on cherche l'indicateur SPI correspondant
                        indicator_size = indicatorSpi + '_' + G_DicSize[size]
                    except:
                        fileLog.write('pas de size ' + size +
                                      ' indicateur : ' + indicator +
                                      ' country : ' + country + '\n')
                        continue  #on passe on record suivant
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicatorSize(
                        country, nace, vector, indicator_size, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    keySize = dicSize.keys()
    keySize.sort()
    for s in keySize:
        fileLog.write(' List Size in Eurostat Input : ' + s + '\n')
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicatorSize(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTableSize(nomenclature, dicIndicator, fileLog,
                                      minStartYear, dicNace, indicatorSpi,
                                      compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName,
                                    G_DicSize)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    Unit = G_Unit
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    fichiersTXT.sort()
    fichiersTXT.reverse()
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #recherche de la position de chaque variable dans input eurostat
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSector = dicEurostat['sector']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace 1 : unit,sector,nace_r1,indic_na,geo\time
        #nace 2 : unit,nace_r2,indic_na,sector,geo\time
        #MIO_EUR,A,B1G,AT	: 	: 	: 	3781.7 	4375.5 	4322.6 	3827.4 	3542.7 	3921.8 	3763.1
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            unit = ligne[iUnit].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            if iSector == -1:
                sector = 'S1'
            else:
                sector = ligne[iSector].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #indicateur pour savoir si on doit selectionner les indicateurs
                #dans la liste definie dans la table oracle ou uniquement sur la longueur (4)
                #la regle est si nace1 alors on prend la liste
                #si nace2 et nama on prend la liste sur sbs on teste sur la longueur max 4
                if indicator == indicatorEurostat and (unit == Unit) and (
                        sector == 'S1') and dicNace.has_key(nace):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTableGrowth(nomenclature, dicIndicator, fileLog,
                                        minStartYear, dicNace, indicatorSpi,
                                        compteEurostat, G_tableName, G_Growth,
                                        G_FileExt)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
Пример #7
0
def traitementFichierTXT(indicatorInput, nomenclature, compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    dicIndicatorDomain = {}
    dicIndicatorDomain = FileAccess.lectureIndicator(dicIndicatorDomain,
                                                     'competition', dirUse)
    indicatorSpi = dicIndicatorDomain[indicatorInput]
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            sizeEurostat = ligne[iSize].strip()

            #la colonne size n'existe pas pour le NACE2 pour les sbs_na *on rempli donc la condition a chaque fois
            if nomenclature == 'nace2' and compteEurostat == 'sbs':
                sizeEurostat = 'TOTAL'
            else:
                sizeEurostat = ligne[iSize].strip()

            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #indicateur pour savoir si on doit selectionner les indicateurs
                #dans la liste definie dans la table oracle ou uniquement sur la longueur (4)
                #la regle est si nace1 alors on prend la liste
                #si bd on prend tout
                #if	indicator  == indicatorInput and sizeEurostat == G_Size and dicNace.has_key(nace):
                if indicator == indicatorInput and sizeEurostat == G_Size:
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat

    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    #traitement indicator
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog,
                                  minStartYear, dicNace, indicatorSpi,
                                  compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
def traitementFichierTXT(nomenclature, indicNumerator, indicDenominator,
                         filePaths):
    dicNumerator = {}
    dicDenominator = {}

    dicNace = spiLib.defSelectdicNace(nomenclature, 'sbs')
    dicNation = DBAccess.lectureNationEurostat({})

    maxEndYear = -1
    minStartYear = 9999

    for filePath in filePaths:
        with open(filePath, 'r') as file:
            csvFile = csv.reader(file, delimiter='\t')
            firstLine = csvFile.next()
            metaLabel = firstLine[0].split(',')
            dicEurostat = spiLib.defDicEurostat(metaLabel)
            iNace = dicEurostat['nace']
            iIndic = dicEurostat['indic']
            iGeoTime = dicEurostat['geotime']
            iSize = dicEurostat['size']

            endYear = firstLine[1].strip()
            startYear = firstLine[-1].strip()

            if int(endYear) > maxEndYear:
                maxEndYear = int(endYear)
            if int(startYear) < minStartYear:
                minStartYear = int(startYear)

            for line in csvFile:
                meta = line[0].split(',')
                code = meta[iNace]
                country = meta[iGeoTime]
                indic = meta[iIndic]
                if nomenclature == 'nace2':
                    size = 'TOTAL'
                else:
                    size = meta[iSize]

                if code in dicNace and country in dicNation and size == 'TOTAL':
                    if indic == indicNumerator or indic == indicDenominator:
                        vector = []
                        vector.append(endYear)
                        vector.extend([
                            re.sub('[" ",a-z]', '', element)
                            for element in line[1:]
                        ])
                        vector.append(startYear)
                        if indic == indicNumerator:
                            dicNumerator = spiLib.defDicIndicator(
                                country, code, vector, dicNumerator)
                        else:
                            dicDenominator = spiLib.defDicIndicator(
                                country, code, vector, dicDenominator)

    dicNumerator = spiLib.reverseAndNormalizeDic(dicNumerator, minStartYear,
                                                 maxEndYear)
    dicDenominator = spiLib.reverseAndNormalizeDic(dicDenominator,
                                                   minStartYear, maxEndYear)

    dicNumerator = spiLibTotal.calcNaceAggregates(dicNumerator, nomenclature,
                                                  'sbs')
    dicDenominator = spiLibTotal.calcNaceAggregates(dicDenominator,
                                                    nomenclature, 'sbs')

    spiLibCreateTable.createTableOverOtherShare(dicNumerator, dicDenominator,
                                                minStartYear, 'surpl',
                                                nomenclature, 'competition')
Пример #9
0
def traitementFichierTXT(indicatorEurostat, unitEurostat, nomenclature, cpa,
                         files, fileLog, tableName):
    infoX = DBAccess.lectureCpaNaceIndicatorData('x', cpa, 'external')
    infoM = DBAccess.lectureCpaNaceIndicatorData('m', cpa, 'external')
    refDicX = infoX[0]
    startYearX = infoX[1]
    refDicM = infoM[0]
    startYearM = infoM[1]
    refDicGO = {}
    dicNoCountry = {}
    dicNation = {}
    dicNaceCheck = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicNace = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData = 999999
    maxEndYear = -1

    files.sort()
    files.reverse()

    for txt in files:
        file = open(txt, 'r')
        line1st = file.readline()
        list1st = line1st.split(',')
        dicEurostat = spiLib.defDicEurostat(list1st)
        iUnit = dicEurostat['unit']
        iIndic = dicEurostat['indic']
        iNace = dicEurostat['nace']
        iGeoTime = dicEurostat['geotime']
        geotime = list1st[iGeoTime].split('\t')

        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()

        for line in file:
            lineList = line.strip('\n').split(',')
            nace = lineList[iNace].strip()
            indicator = lineList[iIndic].strip()
            unit = lineList[iUnit].strip()
            geoTime = lineList[iGeoTime].split('\t')
            geo = geoTime[0].strip()

            try:
                dicNaceCheck[nace] = nace
                country = dicNation[geo]
                timeSerie = geoTime[1:]

                if indicator == indicatorEurostat and dicNace.has_key(
                        nace) and unit == unitEurostat:
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                        timeSerie, int(startYear), minimumYearWithActualData)
                    refDicGO = spiLib.defDicIndicator(country, nace, vector,
                                                      refDicGO)
                    if int(endYear) > maxEndYear:
                        maxEndYear = int(endYear)
            except:
                dicNoCountry[geo] = geo

    refDicGO = spiLib.reverseAndNormalizeDic(refDicGO, startYearX, maxEndYear)
    refDicGO = spiLibTotal.calcNaceAggregates(refDicGO, nomenclature, 'nama')

    refDicX = spiLib.normalizeDicSize(refDicX, startYearX, startYearX,
                                      maxEndYear)
    refDicM = spiLib.normalizeDicSize(refDicM, startYearM, startYearX,
                                      maxEndYear)

    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)

    spiLibCreateTable.createTableCompetitionImportpen(nomenclature, refDicGO,
                                                      refDicX, refDicM,
                                                      startYearX, fileLog,
                                                      tableName)