Exemplo n.º 1
0
def traitementFichierTXT(indicatorSpi, unitEurostat, ageEurostat, sexEurostat,
                         nomFichierTXT):
    dicIndicator = {}
    dicNoCountry = {}
    dicNation = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData = 999999

    ## added 20190326
    iscedTOindicator = {
        'ED0-2': 'edusl',
        'ED3_4': 'edusup',
        'ED5-8': 'edut',
        'ED3-8': 'edust'
    }

    fichierTXT = open(nomFichierTXT, 'r')
    rec1er = fichierTXT.readline()
    lstrec = rec1er.split(',')
    dicEurostat = spiLib.defDicEurostat(lstrec)
    iUnit = dicEurostat['unit']
    iAge = dicEurostat['age']
    iSex = dicEurostat['sex']
    iIsced = dicEurostat['indic']  ## added 20190326
    iGeoTime = dicEurostat['geotime']
    geotime = lstrec[iGeoTime].split('\t')
    endYear = geotime[1].strip()
    startYear = geotime[-1].strip()

    for ligneTXT in fichierTXT:
        ligne = ligneTXT.strip('\n').split(',')
        age = ligne[iAge].strip()
        unit = ligne[iUnit].strip()
        sex = ligne[iSex].strip()
        indicator = iscedTOindicator[ligne[iIsced].strip()]  ## added 20190326
        geoTime = ligne[iGeoTime].split('\t')
        geoEuroStat = geoTime[0].strip()
        try:
            country = dicNation[geoEuroStat]
            timeSerie = geoTime[1:]
            if sex == sexEurostat and age == ageEurostat and unit == unitEurostat and indicator == indicatorSpi:  # added indicator check to if statement 20190326
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                    timeSerie, int(startYear), minimumYearWithActualData)
                dicIndicator[country] = vector
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat

    spiLib.defnoCountry(dicNoCountry, fileLog)
    dicIndicator = spiLib.reverseAndNormalizeDicNoIndicator(
        dicIndicator, minimumYearWithActualData, int(endYear))
    spiLibCreateTable.createTableCountryLevelEduTech(
        dicIndicator, G_spiIndicator, minimumYearWithActualData, fileLog,
        G_tableName)
def traitementFichierTXT(indicatorSpi, unitEurostat, flowEurostat, partnerEurostat, nomFichierTXT):
    dicIndicator                = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData   = 999999
    
    fichierTXT     = open(nomFichierTXT,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iUnit          = dicEurostat['unit']
    iFlow          = dicEurostat['flow']
    iPartner       = dicEurostat['partner']
    iGeoTime       = dicEurostat['geotime']
    geotime        = lstrec[iGeoTime].split('\t')
    endYear        = geotime[1].strip()
    startYear      = geotime[-1].strip()
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        flow          = ligne[iFlow].strip()
        unit          = ligne[iUnit].strip()
        partner       = ligne[iPartner].strip()
        geoTime       = ligne[iGeoTime].split('\t')            
        geoEuroStat   = geoTime[0].strip()
        
        try:
            country = dicNation[geoEuroStat]                    
            timeSerie = geoTime[1:]
            if flow == flowEurostat and partner == partnerEurostat and unit == unitEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                dicIndicator[country] = vector  
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat
            
    spiLib.defnoCountry(dicNoCountry,fileLog)
    dicIndicator = spiLib.reverseAndNormalizeDicNoIndicator(dicIndicator, minimumYearWithActualData, int(endYear))
    spiLibCreateTable.createTableCountryLevelEduTech(dicIndicator,G_spiIndicator,minimumYearWithActualData,fileLog,G_tableName)
Exemplo n.º 3
0
def traitementFichierTXT(indicatorInputEurostat, unitEurostat, nomFichierTXT):
    startIndice                 = 0
    dicIndicators               = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    indicatorEurostat           = indicatorInputEurostat
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    #dicStartValue       = dict(startIndice=1,startCountry='',startNace='',startIndicator='',startValeur=0)
    minimumYearWithActualData   = 999999
    
    fichierTXT     = open(nomFichierTXT,'r')
    rec1er         = fichierTXT.readline() #1er rec avec les meta
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iUnit          = dicEurostat['unit']
    iIndic         = dicEurostat['indic']
    iGeoTime       = dicEurostat['geotime']
    geotime        = lstrec[iGeoTime].split('\t')
    endYear        = geotime[1].strip()
    startYear      = geotime[-1].strip()
        
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',') #RAPPEL strip enleve des extremites
        indicator     = ligne[iIndic].strip()
        unit          = ligne[iUnit].strip()
        geoTime       = ligne[iGeoTime].split('\t')            
        geoEuroStat   = geoTime[0].strip()
        try:
            country = dicNation[geoEuroStat]                    
            timeSerie = geoTime[1:]
            if indicator in indicatorEurostat and unit == unitEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                dicIndicators = spiLib.defDicIndicators(country,indicator,vector,dicIndicators)    
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat
    spiLib.defnoCountry(dicNoCountry,fileLog)
    dicIndicators = spiLib.reverseAndNormalizeDicIndicators(dicIndicators, minimumYearWithActualData, int(endYear))
    spiLibCreateTable.createTableCountryLevelOpen(dicIndicators,minimumYearWithActualData,fileLog,G_tableName)
Exemplo n.º 4
0
def traitementFichierTxt(spiIndicator, eurostatIndicator, nomenclature,
                         fileData, fileLabel, tableName, fileLog):
    listNation = DBAccess.defListNationIso2()
    if nomenclature == 'bec':
        listCpa = DBAccess.lectureBecEurostat()
    else:
        listCpa = DBAccess.lectureCpaSimple(nomenclature)
    uselessNation = {}
    uselessCpa = {}
    dicIndicator = {}

    file = open(fileData, 'r')
    line1st = file.readline()
    list1st = line1st.split('\t')
    dicComext = spiLib.defDicComext(list1st)
    iReporter = dicComext['reporter']
    iPartner = dicComext['partner']
    iProduct = dicComext['product']
    iFlow = dicComext['flow']
    iPeriod = dicComext['period']
    iValue = dicComext['value']

    minYear = 999999
    maxYear = 0

    for line in file:
        list = line.split('\t')
        reporter = list[iReporter].strip()
        partner = list[iPartner].strip().strip('"')
        product = list[iProduct].strip()
        if product[0] == '0':
            product = product[1:]
        flow = list[iFlow].strip()
        period = int(list[iPeriod].strip()[0:4])
        value = list[iValue].strip()

        if reporter in listNation:
            if product in listCpa:
                if flow == eurostatIndicator:
                    try:
                        dicIndicator[reporter][product][partner][
                            period] = value
                    except:
                        try:
                            dicIndicator[reporter][product][partner] = {}
                            dicIndicator[reporter][product][partner][
                                period] = value
                        except:
                            try:
                                dicIndicator[reporter][product] = {}
                                dicIndicator[reporter][product][partner] = {}
                                dicIndicator[reporter][product][partner][
                                    period] = value
                            except:
                                dicIndicator[reporter] = {}
                                dicIndicator[reporter][product] = {}
                                dicIndicator[reporter][product][partner] = {}
                                dicIndicator[reporter][product][partner][
                                    period] = value
                    if period > maxYear:
                        maxYear = period
                    if period < minYear:
                        minYear = period
            else:
                uselessCpa[product] = product
        else:
            uselessNation[reporter] = reporter

    file.close()

    spiLib.defnoCountry(uselessNation, fileLog)
    del uselessNation
    spiLib.defUselessCode(uselessCpa, fileLog)
    del uselessCpa

    dicIndicator = spiLibComext.convertSingleValueToVector(
        dicIndicator, minYear, maxYear)
    dicIndicator = spiLibComext.mergePartners(dicIndicator)
    if nomenclature == 'bec':
        dicIndicator = spiLibComext.formatBecEurostatDic(dicIndicator)
    dicIndicator = spiLibTotal.calcNaceAggregates(dicIndicator, nomenclature,
                                                  'manufacturing')

    dicIndicator = spiLibTotal.calcNaceAggregates(dicIndicator, nomenclature,
                                                  'comext')

    spiLibCreateTable.createTableExternalXMShare(spiIndicator, nomenclature,
                                                 minYear, dicIndicator,
                                                 tableName, fileLog)
def traitementFichierTXT(tableName, spiIndicator, indicEurostat, postEurostat, unitEurostat, partnerEurostat, gdpIndicEurostat, nomFichierTXT, nomFichierTXTGdp, nomFichierTXT2, partnerEurostat2, itemEurostat, gdpUnit):
    dicIndicator                = {}
    dicIndicator2               = {}
    dicGdp                      = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData   = 999999
    
    fichierTXT     = open(nomFichierTXT,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iPostTime      = dicEurostat['post']
    iPartner       = dicEurostat['partner']
    iGeo           = dicEurostat['geotime']
    
    posttime       = lstrec[iPostTime].split('\t')
    endYear        = int(posttime[1].strip())
    startYear      = int(posttime[-1].strip())
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        indic         = ligne[iIndic].strip()
        geo           = ligne[iGeo].strip()
        partner       = ligne[iPartner].strip()
        postTime      = ligne[iPostTime].split('\t')
        post          = postTime[0].strip()
        
        try:
            country = dicNation[geo]                    
            timeSerie = postTime[1:]
            if partner == partnerEurostat and post == postEurostat and indic == indicEurostat :
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                dicIndicator[country] = vector 
        except:
            dicNoCountry[geo] = geo
    
    fichierTXT.close()        
    dicIndicator   = spiLib.reverseAndNormalizeDic(dicIndicator, minimumYearWithActualData, endYear)
    fileEndYear   = endYear
           
    fichierTXT     = open(nomFichierTXT2,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iPartnerTime   = dicEurostat['partner']
    iGeo           = dicEurostat['geotime']
    iItem          = dicEurostat['item']
           
    geotime         = lstrec[iGeo].split('\t')
    endYear2        = int(geotime[1].strip())
    startYear2      = int(geotime[-1].strip())
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        indic         = ligne[iIndic].strip()
        item          = ligne[iItem].strip()
        partner       = ligne[iPartnerTime].strip()
        geotime       = ligne[iGeo].split('\t')
        geo           = geotime[0].strip()

        try:
            country = dicNation[geo]                    
            timeSerie = geotime[1:]
            if partner == partnerEurostat2 and indic == indicEurostat  and item == itemEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear2, endYear2)
                dicIndicator2[country] = vector   
        except:
            dicNoCountry[geo] = geo           
    
    fichierTXT.close()      
    dicIndicator2   = spiLib.reverseAndNormalizeDic(dicIndicator2, startYear2, endYear2)
    finalEndYear   = endYear2    
            
    for country in dicIndicator :
        if country not in dicIndicator2 :
            dicIndicator2[country] = []
            for i in range(startYear2-1, endYear2) :
                dicIndicator2[country].append(':')
        
    for country in dicIndicator2 : 
        if country not in dicIndicator :
            dicIndicator[country] = []
            for i in range(minimumYearWithActualData-1, endYear) :
                dicIndicator[country].append(':')
    
    yearGap = startYear2 - endYear - 1
        
    if yearGap == 0 :
        for country in dicIndicator :
            dicIndicator[country].extend(dicIndicator2[country])
    elif yearGap > 0 :
        fillingVector = []
        for i in range(0, yearGap) :
            fillingVector.append(':')
        for country in dicIndicator :
            dicIndicator[country].extend(fillingVector)
            dicIndicator[country].extend(dicIndicator2[country])
    elif yearGap < 0 :
        for country in dicIndicator :
            del dicIndicator[country][yearGap:]
        for country in dicIndicator :
            dicIndicator[country].extend(dicIndicator2[country])        
    
    fichierTXT     = open(nomFichierTXTGdp,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iGeoTime       = dicEurostat['geotime']
    iUnit          = dicEurostat['unit']
    
    geotime        = lstrec[iGeoTime].split('\t')
    endYear        = int(geotime[1].strip())
    startYear      = int(geotime[-1].strip())
    
    for ligneTXT in fichierTXT :
        ligne       = ligneTXT.strip('\n').split(',')
        indic       = ligne[iIndic].strip()
        unit        = ligne[iUnit].strip()
        geoTime     = ligne[iGeoTime].split('\t')            
        geoEurostat = geoTime[0].strip()
        
        try:
            country = dicNation[geoEurostat]                    
            timeSerie = geoTime[1:]
            if indic == gdpIndicEurostat and unit == gdpUnit:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                dicGdp[country] = vector
        except:
            dicNoCountry[geoEurostat] = geoEurostat        
        
    fichierTXT.close()
    dicGdp = spiLib.reverseAndNormalizeDic(dicGdp, minimumYearWithActualData, finalEndYear)
    
    spiLib.defnoCountry(dicNoCountry,fileLog)
    spiLibCreateTable.createTableCountryLevelFdi(dicIndicator,dicGdp,spiIndicator,minimumYearWithActualData,fileLog,tableName)
Exemplo n.º 6
0
def traitementFichierTXT(indicatorEurostatDenominator, unitDenominator,
                         unitNumerator, nomenclature, filesDenominator,
                         fileNumerator, indicatorSpi, fileLog, tableName):
    dicIndicatorNumerator = {}
    dicIndicatorDenominator = {}
    dicNoCountry = {}
    dicNation = {}
    dicNaceCheck = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicNace = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData = 999999
    maxEndYear = -1

    filesDenominator.sort()
    filesDenominator.reverse()

    file = open(fileNumerator, 'r')
    line1st = file.readline()
    list1st = line1st.split(',')
    dicEurostat = spiLib.defDicEurostat(list1st)
    iUnit = dicEurostat['unit']
    iNace = dicEurostat['nace']
    iGeoTime = dicEurostat['geotime']
    geotime = list1st[iGeoTime].split('\t')

    endYear = geotime[1].strip()
    startYear = geotime[-1].strip()

    #This code is added to create special aggregates asked on 30-09-2016
    if (indicatorSpi == 'patintens'
            or indicatorSpi == 'patintrd') and nomenclature == 'nace2':
        dicNace['C10'] = 'C'
        dicNace['C11'] = 'C'
        dicNace['C12'] = 'C'
        dicNace['C13'] = 'C'
        dicNace['C14'] = 'C'
        dicNace['C15'] = 'C'
        dicNace['C31'] = 'C'
        dicNace['C32'] = 'C'
    #________________________________________________________________________

    for line in file:
        lineList = line.strip('\n').split(',')
        nace = lineList[iNace].strip()
        unit = lineList[iUnit].strip()
        geoTime = lineList[iGeoTime].split('\t')
        geo = geoTime[0].strip()

        try:
            dicNaceCheck[nace] = nace
            country = dicNation[geo]
            timeSerie = geoTime[1:]

            if dicNace.has_key(nace) and unit == unitNumerator:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                    timeSerie, int(startYear), minimumYearWithActualData)
                dicIndicatorNumerator = spiLib.defDicIndicator(
                    country, nace, vector, dicIndicatorNumerator)
                if int(endYear) > maxEndYear:
                    maxEndYear = int(endYear)
        except:
            dicNoCountry[geo] = geo

    file.close()

    dicIndicatorNumerator = spiLib.reverseAndNormalizeDic(
        dicIndicatorNumerator, minimumYearWithActualData, maxEndYear)

    #This code is added to create special aggregates asked on 30-09-2016
    if (indicatorSpi == 'patintens'
            or indicatorSpi == 'patintrd') and nomenclature == 'nace2':
        for country in dicIndicatorNumerator:
            try:
                C10 = dicIndicatorNumerator[country]['C10']
                C11 = dicIndicatorNumerator[country]['C11']
                C12 = dicIndicatorNumerator[country]['C12']
            except:
                C10 = []
                C11 = []
                C12 = []

            try:
                C13 = dicIndicatorNumerator[country]['C13']
                C14 = dicIndicatorNumerator[country]['C14']
                C15 = dicIndicatorNumerator[country]['C15']
            except:
                C13 = []
                C14 = []
                C15 = []

            try:
                C31 = dicIndicatorNumerator[country]['C31']
                C32 = dicIndicatorNumerator[country]['C32']
            except:
                C31 = []
                C32 = []

            res = []

            for i in range(0, len(C10)):
                if C10[i] == ':' or C11[i] == ':' or C12[i] == ':':
                    res.append(':')
                else:
                    res.append(
                        str(float(C10[i]) + float(C11[i]) + float(C12[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C10-C12'] = res

            res = []

            for i in range(0, len(C13)):
                if C13[i] == ':' or C14[i] == ':' or C15[i] == ':':
                    res.append(':')
                else:
                    res.append(
                        str(float(C13[i]) + float(C14[i]) + float(C15[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C13-C15'] = res

            res = []

            for i in range(0, len(C31)):
                if C31[i] == ':' or C32[i] == ':':
                    res.append(':')
                else:
                    res.append(str(float(C31[i]) + float(C32[i])))

            if len(res) > 0:
                dicIndicatorNumerator[country]['C31_C32'] = res
    #__________________________________________________________________________

    dicIndicatorNumerator = spiLibTotal.calcNaceAggregates(
        dicIndicatorNumerator, nomenclature, 'nama')

    for txt in filesDenominator:
        file = open(txt, 'r')
        line1st = file.readline()
        list1st = line1st.split(',')
        dicEurostat = spiLib.defDicEurostat(list1st)
        iUnit = dicEurostat['unit']
        try:
            iIndic = dicEurostat['indic']
        except:
            pass
        iNace = dicEurostat['nace']
        iGeoTime = dicEurostat['geotime']
        geotime = list1st[iGeoTime].split('\t')

        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        for line in file:
            lineList = line.strip('\n').split(',')
            nace = lineList[iNace].strip()
            if iIndic != -1:
                indicator = lineList[iIndic].strip()
            else:
                indicator = 'noindicator'
            unit = lineList[iUnit].strip()
            geoTime = lineList[iGeoTime].split('\t')
            geo = geoTime[0].strip()

            try:
                dicNaceCheck[nace] = nace
                country = dicNation[geo]
                timeSerie = geoTime[1:]
                if indicator != 'noindicator':
                    if indicator == indicatorEurostatDenominator and dicNace.has_key(
                            nace) and unit == unitDenominator:
                        vector = spiLib.defVectorYears(timeSerie, startYear,
                                                       endYear)
                        dicIndicatorDenominator = spiLib.defDicIndicator(
                            country, nace, vector, dicIndicatorDenominator)

                else:
                    if dicNace.has_key(nace) and unit == unitDenominator:
                        vector = spiLib.defVectorYears(timeSerie, startYear,
                                                       endYear)
                        dicIndicatorDenominator = spiLib.defDicIndicator(
                            country, nace, vector, dicIndicatorDenominator)
            except:
                dicNoCountry[geo] = geo

        file.close()

    dicIndicatorDenominator = spiLib.reverseAndNormalizeDic(
        dicIndicatorDenominator, minimumYearWithActualData, maxEndYear)
    dicIndicatorDenominator = spiLibTotal.calcNaceAggregates(
        dicIndicatorDenominator, nomenclature, 'nama')

    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)

    spiLibCreateTable.createTableNacePercentage(
        nomenclature, dicIndicatorNumerator, dicIndicatorDenominator,
        indicatorSpi, minimumYearWithActualData, fileLog, tableName)
Exemplo n.º 7
0
def traitementFichierTXT(indicatorInputEurostat, unitEurostat, nomenclature, sbsFile = ''):
    startIndice         = 0
    dicIndicator        = {}
    dicIndicatorTotal   = {}
    dicNoCountry        = {}    
    dicNaceCheck        = {}
    dicNace             = {}
    dicNation           = {}
    indicatorEurostat   = indicatorInputEurostat
    dicNace             = spiLib.defSelectdicNaceSkillTech(nomenclature)
    dicNation           = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue       = dict(startIndice=1,startCountry='',startNace='',startIndicator='',startValeur=0)
    
    minStartYear        = 99999
    maxEndYear          = -1
    
    for txt in fichiersTXT:
        fichierTXT     = open(txt,'r')
        rec1er         = fichierTXT.readline() #1er rec avec les meta
        lstrec         = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat        
        dicEurostat    = spiLib.defDicEurostat(lstrec)
        iUnit          = dicEurostat['unit']
        iNace          = dicEurostat['nace']
        iIndic         = dicEurostat['indic']
        iGeoTime       = dicEurostat['geotime']
        geotime        = lstrec[iGeoTime].split('\t')
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES    9037     6410     3544     3311     3336     3084
        for ligneTXT in fichierTXT: 
            ligne         = ligneTXT.strip('\n').split(',') #RAPPEL strip enleve des extremites
            nace          = ligne[iNace].strip()
            indicator     = ligne[iIndic].strip()
            unit          = ligne[iUnit].strip()
            geoTime       = ligne[iGeoTime].split('\t')            
            geoEuroStat   = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]                    
                timeSerie = geoTime[1:]

                if indicator == indicatorEurostat and dicNace.has_key(nace) and unit == unitEurostat:
                    dicNaceCheck[nace] = nace
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    dicStartValue = spiLib.defDicStartValue(timeSerie,country,nace,indicator,dicStartValue, endYear)
                    dicIndicator = spiLib.defDicIndicator(country,nace,vector,dicIndicator)
                    if int(endYear) > maxEndYear :
                        maxEndYear = int(endYear)
                    if int(startYear) < minStartYear :
                        minStartYear = int(startYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat   
        fichierTXT.close()
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+' valeur '+str(dicStartValue['startValeur'])+'\n')
    spiLib.defnoCountry(dicNoCountry,fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck,dicNace,fileLog)
    dicIndicator = spiLib.reverseAndNormalizeDic(dicIndicator, minStartYear, maxEndYear)
    
    if nomenclature == 'nace2' :
        dicSbs = {}
        fichierTXT     = open(sbsFile,'r')
        rec1er         = fichierTXT.readline()
        lstrec         = rec1er.split(',')
        dicEurostat    = spiLib.defDicEurostat(lstrec)
        iNace          = dicEurostat['nace']
        iIndic         = dicEurostat['indic']
        iGeoTime       = dicEurostat['geotime']
        geotime        = lstrec[iGeoTime].split('\t')
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        for ligneTXT in fichierTXT :
            ligne         = ligneTXT.strip('\n').split(',')
            nace          = ligne[iNace].strip()
            indicator     = ligne[iIndic].strip()
            geoTime       = ligne[iGeoTime].split('\t')            
            geoEuroStat   = geoTime[0].strip()
            
            try:
                country = dicNation[geoEuroStat]                    
                timeSerie = geoTime[1:]
                if indicator == 'V12150' and nace in ('N80', 'N81', 'N82') :
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    dicSbs = spiLib.defDicIndicator(country,nace,vector,dicSbs)
            except:
                continue
        fichierTXT.close()
        dicSbs = spiLib.reverseAndNormalizeDic(dicSbs, minStartYear, maxEndYear)
        dicSbs = spiLibTotal.createSkillTechNace2SbsTotal(dicSbs)
        dicRatio = spiLibTotal.createSkillTechNace2Ratio(dicSbs)
        del dicSbs
        
        dicIndicator = spiLibTotal.addSkillTechNace2RemainingCodes(dicIndicator, dicRatio)
                
    spiLibCreateTable.createTableSkillTech(nomenclature,dicIndicator,minStartYear,fileLog,G_tableName)
Exemplo n.º 8
0
def traitementFichierTXT(indicatorInput, nomenclature, compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    dicIndicatorDomain = {}
    dicIndicatorDomain = FileAccess.lectureIndicator(dicIndicatorDomain,
                                                     'competition', dirUse)
    indicatorSpi = dicIndicatorDomain[indicatorInput]
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            sizeEurostat = ligne[iSize].strip()

            #la colonne size n'existe pas pour le NACE2 pour les sbs_na *on rempli donc la condition a chaque fois
            if nomenclature == 'nace2' and compteEurostat == 'sbs':
                sizeEurostat = 'TOTAL'
            else:
                sizeEurostat = ligne[iSize].strip()

            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #indicateur pour savoir si on doit selectionner les indicateurs
                #dans la liste definie dans la table oracle ou uniquement sur la longueur (4)
                #la regle est si nace1 alors on prend la liste
                #si bd on prend tout
                #if	indicator  == indicatorInput and sizeEurostat == G_Size and dicNace.has_key(nace):
                if indicator == indicatorInput and sizeEurostat == G_Size:
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat

    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    #traitement indicator
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog,
                                  minStartYear, dicNace, indicatorSpi,
                                  compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
Exemplo n.º 9
0
def traitementFichierTXT(indicatorEurostatDenominator, indicatorEurostatNumerator, unitDenominator, unitNumerator, nomenclature, filesDenominator, filesNumerator, baseYear, indicatorSpi, fileLog, tableName):
    dicIndicatorNumerator       = {}
    dicIndicatorDenominator     = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    dicNaceCheck                = {}
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    dicNace                     = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData   = 999999
    maxEndYear                  = -1
    
    filesNumerator.sort()
    filesNumerator.reverse()    
    filesDenominator.sort()
    filesDenominator.reverse()
    
    for txt in filesNumerator :
        file           = open(txt, 'r')
        line1st        = file.readline()
        list1st        = line1st.split(',')
        dicEurostat    = spiLib.defDicEurostat(list1st)
        iUnit          = dicEurostat['unit']
        iIndic         = dicEurostat['indic']
        iNace          = dicEurostat['nace']
        iGeoTime       = dicEurostat['geotime']
        geotime        = list1st[iGeoTime].split('\t')
        
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        
        for line in file :
            lineList      = line.strip('\n').split(',')
            nace          = lineList[iNace].strip()
            indicator     = lineList[iIndic].strip()
            unit          = lineList[iUnit].strip()
            geoTime       = lineList[iGeoTime].split('\t')            
            geo           = geoTime[0].strip()
            
            try :
                dicNaceCheck[nace] = nace
                country = dicNation[geo]                    
                timeSerie = geoTime[1:]
                
                if indicator == indicatorEurostatNumerator and dicNace.has_key(nace) and unit == unitNumerator:
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                    dicIndicatorNumerator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorNumerator)
                    if int(endYear) > maxEndYear :
                        maxEndYear = int(endYear)
            except :
                dicNoCountry[geo] = geo
                
        file.close()
    
    dicIndicatorNumerator = spiLib.reverseAndNormalizeDic(dicIndicatorNumerator, minimumYearWithActualData, maxEndYear)
    dicIndicatorNumerator = spiLibTotal.calcNaceAggregates(dicIndicatorNumerator, nomenclature, 'nama')
    
    for txt in filesDenominator :
        file           = open(txt, 'r')
        line1st        = file.readline()
        list1st        = line1st.split(',')
        dicEurostat    = spiLib.defDicEurostat(list1st)
        iUnit          = dicEurostat['unit']
        iIndic         = dicEurostat['indic']
        iNace          = dicEurostat['nace']
        iGeoTime       = dicEurostat['geotime']
        iSector        = dicEurostat['sector'] 
        geotime        = list1st[iGeoTime].split('\t')
        
        endYear        = geotime[1].strip()
        startYear      = geotime[-1].strip()
        
        for line in file :
            lineList      = line.strip('\n').split(',')
            nace          = lineList[iNace].strip()
            indicator     = lineList[iIndic].strip()
            unit          = lineList[iUnit].strip()
            geoTime       = lineList[iGeoTime].split('\t')  
            if iSector == -1 : 
                sector = 'S1'
            else :
                sector = lineList[iSector].strip()
            geo           = geoTime[0].strip()
            
            try :
                dicNaceCheck[nace] = nace
                country = dicNation[geo]                    
                timeSerie = geoTime[1:]
                
                if indicator == indicatorEurostatDenominator and dicNace.has_key(nace) and unit == unitDenominator and sector == 'S1' :
                    vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                    dicIndicatorDenominator = spiLib.defDicIndicator(country,nace,vector,dicIndicatorDenominator)
            except :
              dicNoCountry[geo] = geo  
        
        file.close()
        
    dicIndicatorDenominator = spiLib.reverseAndNormalizeDic(dicIndicatorDenominator, minimumYearWithActualData, maxEndYear)
    dicIndicatorDenominator = spiLibTotal.calcNaceAggregates(dicIndicatorDenominator, nomenclature, 'nama')
    
    #The following lines are added on demand of B2 team to calculate aggregates that cannot be extracted from the nama_nace10_e file 
    if nomenclature == 'nace2' and indicatorEurostatDenominator == 'EMP':
        dicIndicatorDenominator = spiLibTotal.calcNace2EmpAggr(dicIndicatorDenominator)
    
    ##############################################################################################################################
    
    spiLib.defnoCountry(dicNoCountry,fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck,dicNace,fileLog)   
    
    spiLibCreateTable.createTableDomesticIndex(nomenclature,dicIndicatorNumerator, dicIndicatorDenominator, indicatorSpi, minimumYearWithActualData, baseYear,fileLog, tableName)     
Exemplo n.º 10
0
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicAgregatNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorSpiTotal = G_IndicatorSPI_T
    indicatorEurostat = indicatorInputEurostat
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                if (indicator == indicatorEurostat ) and\
                 ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \
                 (nomenclature == 'nace2' and len(nace) < 4)):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat

    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    #creation indicateur SPI par ex vabus avec en retour le dic des agregats
    dicAgregatNace = spiLibCreateTable.createTable(nomenclature, dicIndicator,
                                                   fileLog, minStartYear,
                                                   dicNace, indicatorSpi,
                                                   compteEurostat, G_tableName)
    if indicatorSpiTotal != 'noTotal':
        #creation indicateur total SPI par ex vabussh
        spiLibCreateTable.createTableTotal(nomenclature, dicAgregatNace,
                                           dicIndicator, minStartYear, fileLog,
                                           dicNace, indicatorSpiTotal,
                                           compteEurostat, G_tableName)
        spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear,
                                        fileLog, indicatorSpiTotal,
                                        compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
Exemplo n.º 11
0
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicIndicatorTotal = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    dicSize = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1

    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace_r1,indic_sb,size_emp,geo\time
        #E,V11110,TOTAL,ES	9037 	6410 	3544 	3311 	3336 	3084
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            size = ligne[iSize].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #print indicator,indicatorEurostat,indicatorInput,size,nomenclature,nace
                if (indicator == indicatorEurostat and (G_LstSize.count(size))) and\
                 ((nomenclature == 'nace1' and dicNace.has_key(nace)) or \
                 (nomenclature == 'nace2' and len(nace) < 4)):
                    dicSize[size] = size  #pour connaitre les size de eurostat
                    try:  #on cherche l'indicateur SPI correspondant
                        indicator_size = indicatorSpi + '_' + G_DicSize[size]
                    except:
                        fileLog.write('pas de size ' + size +
                                      ' indicateur : ' + indicator +
                                      ' country : ' + country + '\n')
                        continue  #on passe on record suivant
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicatorSize(
                        country, nace, vector, indicator_size, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    keySize = dicSize.keys()
    keySize.sort()
    for s in keySize:
        fileLog.write(' List Size in Eurostat Input : ' + s + '\n')
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicatorSize(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTableSize(nomenclature, dicIndicator, fileLog,
                                      minStartYear, dicNace, indicatorSpi,
                                      compteEurostat, G_tableName)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName,
                                    G_DicSize)
Exemplo n.º 12
0
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    Unit = G_Unit
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    fichiersTXT.sort()
    fichiersTXT.reverse()
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #recherche de la position de chaque variable dans input eurostat
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSector = dicEurostat['sector']
        iSize = dicEurostat['size']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        #nace 1 : unit,sector,nace_r1,indic_na,geo\time
        #nace 2 : unit,nace_r2,indic_na,sector,geo\time
        #MIO_EUR,A,B1G,AT	: 	: 	: 	3781.7 	4375.5 	4322.6 	3827.4 	3542.7 	3921.8 	3763.1
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            unit = ligne[iUnit].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            if iSector == -1:
                sector = 'S1'
            else:
                sector = ligne[iSector].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                #indicateur pour savoir si on doit selectionner les indicateurs
                #dans la liste definie dans la table oracle ou uniquement sur la longueur (4)
                #la regle est si nace1 alors on prend la liste
                #si nace2 et nama on prend la liste sur sbs on teste sur la longueur max 4
                if indicator == indicatorEurostat and (unit == Unit) and (
                        sector == 'S1') and dicNace.has_key(nace):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    minStartYear = dicStartValue['startYear']
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTableGrowth(nomenclature, dicIndicator, fileLog,
                                        minStartYear, dicNace, indicatorSpi,
                                        compteEurostat, G_tableName, G_Growth,
                                        G_FileExt)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
Exemplo n.º 13
0
def traitementFichierTxt(spiIndicator, eurostatIndicator, nomenclature,
                         fileData, fileLabel, tableName, fileLog):
    listNation = DBAccess.defListNationIso2()
    if nomenclature == 'bec':
        listCpa = DBAccess.lectureBecEurostat()
    else:
        listCpa = DBAccess.lectureCpaSimple(nomenclature)
    uselessNation = {}
    uselessCpa = {}
    dicIndicator = {}

    file = open(fileData, 'r')
    line1st = file.readline()
    list1st = line1st.split('\t')
    dicComext = spiLib.defDicComext(list1st)
    iReporter = dicComext['reporter']
    iPartner = dicComext['partner']
    iProduct = dicComext['product']
    iFlow = dicComext['flow']
    iPeriod = dicComext['period']
    iValue = dicComext['value']

    minYear = 999999
    maxYear = 0

    for line in file:
        list = line.split('\t')
        reporter = list[iReporter].strip()
        partner = list[iPartner].strip().strip('"')
        product = list[iProduct].strip()
        if product[0] == '0':
            product = product[1:]
        flow = list[iFlow].strip()
        period = int(list[iPeriod].strip()[0:4])
        value = list[iValue].strip()

        if reporter in listNation:
            if product in listCpa:
                if flow == eurostatIndicator:
                    try:
                        dicIndicator[reporter][product][partner][
                            period] = value
                    except:
                        try:
                            dicIndicator[reporter][product][partner] = {}
                            dicIndicator[reporter][product][partner][
                                period] = value
                        except:
                            try:
                                dicIndicator[reporter][product] = {}
                                dicIndicator[reporter][product][partner] = {}
                                dicIndicator[reporter][product][partner][
                                    period] = value
                            except:
                                dicIndicator[reporter] = {}
                                dicIndicator[reporter][product] = {}
                                dicIndicator[reporter][product][partner] = {}
                                dicIndicator[reporter][product][partner][
                                    period] = value
                    if period > maxYear:
                        maxYear = period
                    if period < minYear:
                        minYear = period
            else:
                uselessCpa[product] = product
        else:
            uselessNation[reporter] = reporter

    file.close()

    spiLib.defnoCountry(uselessNation, fileLog)
    del uselessNation
    spiLib.defUselessCode(uselessCpa, fileLog)
    del uselessCpa

    dicIndicator = spiLibComext.convertSingleValueToVector(
        dicIndicator, minYear, maxYear)

    for country in dicIndicator:
        for code in dicIndicator[country]:
            res = []
            refIntra = {}
            refExtra = {}

            try:
                refIntra = dicIndicator[country][code]['EU27_INTRA']
            except:
                fileLog.write('Missing intra EU27 reference for country ' +
                              country + ' and code ' + code + '.\n')
                continue

            try:
                refExtra = dicIndicator[country][code]['EU27_EXTRA']
            except:
                fileLog.write('Missing extra EU27 reference for country ' +
                              country + ' and code ' + code + '.\n')
                continue

            for i in range(0, len(refIntra)):
                if refIntra[i] == ':':
                    res.append(refExtra[i])
                elif refExtra[i] == ':':
                    res.append(refIntra[i])
                else:
                    res.append('{0:.0f}'.format(
                        (int(refIntra[i]) + int(refExtra[i]))))
            dicIndicator[country][code] = res

    if nomenclature == 'bec':
        dicIndicator = spiLibComext.formatBecEurostatDic(dicIndicator)

    dicIndicator = spiLibTotal.calcNaceAggregates(dicIndicator, nomenclature,
                                                  'comext')

    spiLibCreateTable.createTableNomenclatureBasic(dicIndicator, spiIndicator,
                                                   nomenclature, minYear,
                                                   tableName)
def traitementFichierTXT(indicatorInput, indicatorInputEurostat, nomenclature,
                         compteEurostat):
    startIndice = 0
    dicIndicator = {}
    dicNoCountry = {}
    dicNaceCheck = {}
    dicNace = {}
    dicNation = {}
    indicatorSpi = indicatorInput
    indicatorEurostat = indicatorInputEurostat
    Unit = G_Unit
    dicNace = spiLib.defSelectdicNace(nomenclature, compteEurostat)
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicStartValue = dict(startIndice=1,
                         startCountry='',
                         startNace='',
                         startIndicator='',
                         startValeur=0,
                         startYear=1900)
    minStartYear = 99999
    maxEndYear = -1
    fichiersTXT.sort()
    fichiersTXT.reverse()
    for txt in fichiersTXT:
        fichierTXT = open(txt, 'r')
        rec1er = fichierTXT.readline()  #1er rec avec les meta
        lstrec = rec1er.split(',')
        #on selectionne les colonne de l'input d'Eurostat
        dicEurostat = spiLib.defDicEurostat(lstrec)
        iUnit = dicEurostat['unit']
        iNace = dicEurostat['nace']
        iIndic = dicEurostat['indic']
        iSize = dicEurostat['size']
        iSector = dicEurostat['sector']
        iGeoTime = dicEurostat['geotime']
        geotime = lstrec[iGeoTime].split('\t')
        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()
        for ligneTXT in fichierTXT:
            ligne = ligneTXT.strip('\n')  #RAPPEL strip enleve des extremites
            ligne = ligne.split(',')
            nace = ligne[iNace].strip()
            indicator = ligne[iIndic].strip()
            unit = ligne[iUnit].strip()
            geoTime = ligne[iGeoTime].split('\t')
            geoEuroStat = geoTime[0].strip()
            # la colonne sector existait pour le nama_nace64_e.tsv
            # il a disparu dans le esa2010 mais on garde le test
            if iSector == -1:
                sector = 'S1'
            else:
                sector = ligne[iSector].strip()
            try:
                country = dicNation[geoEuroStat]
                timeSerie = geoTime[1:]
                if indicator == indicatorEurostat and (unit == Unit) and (
                        sector == 'S1') and dicNace.has_key(nace):
                    dicNaceCheck[
                        nace] = nace  #on remplit le dic pour faire un check si autant de nace que dans la table SPI
                    vector = spiLib.defVectorYears(
                        timeSerie, startYear,
                        endYear)  #traitement de la serie Eurostat
                    dicStartValue = spiLib.defDicStartValue(
                        timeSerie, country, nace, indicator, dicStartValue,
                        endYear)
                    dicIndicator = spiLib.defDicIndicator(
                        country, nace, vector, dicIndicator)
                    minStartYear, maxEndYear = spiLib.defMinMaxYear(
                        startYear, minStartYear, endYear, maxEndYear)
            except:
                dicNoCountry[geoEuroStat] = geoEuroStat
    #retour avec l'annee de la  1er valeur existante dans les vecteurs
    #different de la 1er annee ou vecteur le plus long car la valeur peut etre ':'
    fileLog.write('highIndice '+str(dicStartValue['startIndice'])+' highCountry '+dicStartValue['startCountry']+\
    ' Indicator '+dicStartValue['startIndicator']+' Nace '+dicStartValue['startNace']+\
    ' valeur '+str(dicStartValue['startValeur'])+' startYear '+str(dicStartValue['startYear'])+'\n')
    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)
    #test des annee de debut, soit par valeur reelle ou par vecteur la valeur peut etre ':'
    if minStartYear != dicStartValue['startYear']:
        fileLog.write('annee min. pour les vecteurs =' + str(minStartYear) +
                      ' annee min. avec une valeur =' +
                      str(dicStartValue['startYear']) + '\n')
        minStartYear = dicStartValue['startYear']
    #traitement indicator
    dicIndicator = spiLib.reverseAndNormalizeDicIndicator(
        dicIndicator, minStartYear, maxEndYear)
    spiLibCreateTable.createTable(nomenclature, dicIndicator, fileLog,
                                  minStartYear, dicNace, indicatorSpi,
                                  compteEurostat, G_tableName, G_FileExt)
    spiLibCreateTable.createTableNE(nomenclature, dicNation, endYear, fileLog,
                                    indicatorSpi, compteEurostat, G_tableName)
def traitementFichierTXT(indicatorSpi, indicatorEurostat, nomFichierTXT, nomFichierTXT2, indicatorEurostat2, eurostatUnit):
    dicIndicator                = {}
    dicIndicator2               = {}
    dicNoCountry                = {}    
    dicNation                   = {}
    dicNation                   = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData   = 999999
    
    fichierTXT     = open(nomFichierTXT,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iGeoTime       = dicEurostat['geotime']
    geotime        = lstrec[iGeoTime].split('\t')
    endYear        = int(geotime[1].strip())
    startYear      = int(geotime[-1].strip())
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        indic         = ligne[iIndic].strip()
        geoTime       = ligne[iGeoTime].split('\t')            
        geoEuroStat   = geoTime[0].strip()
        
        try:
            country = dicNation[geoEuroStat]                    
            timeSerie = geoTime[1:]
            if indic == indicatorEurostat :
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(timeSerie,int(startYear),minimumYearWithActualData)
                dicIndicator[country] = vector  
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat
    
    fichierTXT.close()
    dicIndicator = spiLib.reverseAndNormalizeDicNoIndicator(dicIndicator, minimumYearWithActualData, int(endYear))
    
    fichierTXT     = open(nomFichierTXT2,'r')
    rec1er         = fichierTXT.readline()
    lstrec         = rec1er.split(',')
    dicEurostat    = spiLib.defDicEurostat(lstrec)
    iIndic         = dicEurostat['indic']
    iUnit          = dicEurostat['unit']
    iGeoTime       = dicEurostat['geotime']
    geotime        = lstrec[iGeoTime].split('\t')
    endYear2       = int(geotime[1].strip())
    startYear2     = int(geotime[-1].strip())
    
    for ligneTXT in fichierTXT: 
        ligne         = ligneTXT.strip('\n').split(',')
        indic         = ligne[iIndic].strip()
        unit          = ligne[iUnit].strip()
        geoTime       = ligne[iGeoTime].split('\t')            
        geoEuroStat   = geoTime[0].strip()
        
        try:
            country = dicNation[geoEuroStat]                    
            timeSerie = geoTime[1:]
            if indic == indicatorEurostat2 and unit == eurostatUnit :
                vector = spiLib.defVectorYears(timeSerie, startYear2, endYear2)
                dicIndicator2[country] = vector  
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat
    
    fichierTXT.close()
    dicIndicator2 = spiLib.reverseAndNormalizeDicNoIndicator(dicIndicator2, int(startYear2), int(endYear2))
    
    for country in dicIndicator :
        if country not in dicIndicator2 :
            dicIndicator2[country] = []
            for i in range(startYear2-1, endYear2) :
                dicIndicator2[country].append(':')
        
    for country in dicIndicator2 : 
        if country not in dicIndicator :
            dicIndicator[country] = []
            for i in range(minimumYearWithActualData-1, endYear) :
                dicIndicator[country].append(':')
                
    yearGap = startYear2 - endYear - 1
    
    if yearGap == 0 :
        for country in dicIndicator :
            dicIndicator[country].extend(dicIndicator2[country])
    elif yearGap > 0 :
        fillingVector = []
        for i in range(0, yearGap) :
            fillingVector.append(':')
        for country in dicIndicator :
            dicIndicator[country].extend(fillingVector)
            dicIndicator[country].extend(dicIndicator2[country])
    elif yearGap < 0 :
        for country in dicIndicator :
            del dicIndicator[country][yearGap:]
        for country in dicIndicator :
            dicIndicator[country].extend(dicIndicator2[country])  
        
    spiLib.defnoCountry(dicNoCountry,fileLog)
    spiLibCreateTable.createTableCountryLevelEduTech(dicIndicator,G_spiIndicator,minimumYearWithActualData,fileLog,G_tableName)
Exemplo n.º 16
0
def traitementFichierTXT(indicatorSpi, categoryEurostat, ageEurostat,
                         iscoEurostat, unitEurostat, nomFichierTXT,
                         nomFichierTXT2, naceEurostat, itemEurostat,
                         unitEurostat2):
    dicIndicator = {}
    dicIndicator2 = {}
    dicNoCountry = {}
    dicNation = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    minimumYearWithActualData = 999999

    fichierTXT = open(nomFichierTXT, 'r')
    rec1er = fichierTXT.readline()
    lstrec = rec1er.split(',')
    dicEurostat = spiLib.defDicEurostat(lstrec)
    iCategory = dicEurostat['category']
    iUnit = dicEurostat['unit']
    iIsco = dicEurostat['isco']
    iAge = dicEurostat['age']
    iGeoTime = dicEurostat['geotime']
    geotime = lstrec[iGeoTime].split('\t')
    endYear = geotime[1].strip()
    startYear = geotime[-1].strip()

    for ligneTXT in fichierTXT:
        ligne = ligneTXT.strip('\n').split(',')
        category = ligne[iCategory].strip()
        isco = ligne[iIsco].strip()
        age = ligne[iAge].strip()
        unit = ligne[iUnit].strip()
        geoTime = ligne[iGeoTime].split('\t')
        geoEuroStat = geoTime[0].strip()

        try:
            country = dicNation[geoEuroStat]
            timeSerie = geoTime[1:]
            if unit == unitEurostat and age == ageEurostat and isco == iscoEurostat and category == categoryEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear, endYear)
                minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                    timeSerie, int(startYear), minimumYearWithActualData)
                dicIndicator[country] = vector
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat

    fichierTXT.close()
    spiLib.defnoCountry(dicNoCountry, fileLog)

    fichierTXT = open(nomFichierTXT2, 'r')
    rec1er = fichierTXT.readline()
    lstrec = rec1er.split(',')
    dicEurostat = spiLib.defDicEurostat(lstrec)
    iNace = dicEurostat['nace']
    iUnit = dicEurostat['unit']
    iIndic = dicEurostat['indic']
    iGeoTime = dicEurostat['geotime']
    geotime = lstrec[iGeoTime].split('\t')
    endYear2 = geotime[1].strip()
    startYear2 = geotime[-1].strip()

    for ligneTXT in fichierTXT:
        ligne = ligneTXT.strip('\n').split(',')
        nace = ligne[iNace].strip()
        unit = ligne[iUnit].strip()
        indic = ligne[iIndic].strip()
        geoTime = ligne[iGeoTime].split('\t')
        geoEuroStat = geoTime[0].strip()

        try:
            country = dicNation[geoEuroStat]
            timeSerie = geoTime[1:]
            if unit == unitEurostat2 and indic == itemEurostat and nace == naceEurostat:
                vector = spiLib.defVectorYears(timeSerie, startYear2, endYear2)
                dicIndicator2[country] = vector
        except:
            dicNoCountry[geoEuroStat] = geoEuroStat

    fichierTXT.close()

    dicIndicator = spiLib.reverseAndNormalizeDic(dicIndicator,
                                                 minimumYearWithActualData,
                                                 int(endYear))
    dicIndicator2 = spiLib.reverseAndNormalizeDic(dicIndicator2,
                                                  minimumYearWithActualData,
                                                  int(endYear))

    spiLibCreateTable.createTableCountryLevelFdi(dicIndicator, dicIndicator2,
                                                 G_spiIndicator,
                                                 minimumYearWithActualData,
                                                 fileLog, G_tableName)
Exemplo n.º 17
0
def traitementFichierTXT(indicatorEurostat, unitEurostat, nomenclature, cpa,
                         files, fileLog, tableName):
    infoX = DBAccess.lectureCpaNaceIndicatorData('x', cpa, 'external')
    infoM = DBAccess.lectureCpaNaceIndicatorData('m', cpa, 'external')
    refDicX = infoX[0]
    startYearX = infoX[1]
    refDicM = infoM[0]
    startYearM = infoM[1]
    refDicGO = {}
    dicNoCountry = {}
    dicNation = {}
    dicNaceCheck = {}
    dicNation = DBAccess.lectureNationEurostat(dicNation)
    dicNace = spiLib.defSelectdicNace(nomenclature, 'nama')
    minimumYearWithActualData = 999999
    maxEndYear = -1

    files.sort()
    files.reverse()

    for txt in files:
        file = open(txt, 'r')
        line1st = file.readline()
        list1st = line1st.split(',')
        dicEurostat = spiLib.defDicEurostat(list1st)
        iUnit = dicEurostat['unit']
        iIndic = dicEurostat['indic']
        iNace = dicEurostat['nace']
        iGeoTime = dicEurostat['geotime']
        geotime = list1st[iGeoTime].split('\t')

        endYear = geotime[1].strip()
        startYear = geotime[-1].strip()

        for line in file:
            lineList = line.strip('\n').split(',')
            nace = lineList[iNace].strip()
            indicator = lineList[iIndic].strip()
            unit = lineList[iUnit].strip()
            geoTime = lineList[iGeoTime].split('\t')
            geo = geoTime[0].strip()

            try:
                dicNaceCheck[nace] = nace
                country = dicNation[geo]
                timeSerie = geoTime[1:]

                if indicator == indicatorEurostat and dicNace.has_key(
                        nace) and unit == unitEurostat:
                    vector = spiLib.defVectorYears(timeSerie, startYear,
                                                   endYear)
                    minimumYearWithActualData = spiLib.findMinimumYearWithActualData(
                        timeSerie, int(startYear), minimumYearWithActualData)
                    refDicGO = spiLib.defDicIndicator(country, nace, vector,
                                                      refDicGO)
                    if int(endYear) > maxEndYear:
                        maxEndYear = int(endYear)
            except:
                dicNoCountry[geo] = geo

    refDicGO = spiLib.reverseAndNormalizeDic(refDicGO, startYearX, maxEndYear)
    refDicGO = spiLibTotal.calcNaceAggregates(refDicGO, nomenclature, 'nama')

    refDicX = spiLib.normalizeDicSize(refDicX, startYearX, startYearX,
                                      maxEndYear)
    refDicM = spiLib.normalizeDicSize(refDicM, startYearM, startYearX,
                                      maxEndYear)

    spiLib.defnoCountry(dicNoCountry, fileLog)
    spiLib.defDicNaceCheck(dicNaceCheck, dicNace, fileLog)

    spiLibCreateTable.createTableCompetitionImportpen(nomenclature, refDicGO,
                                                      refDicX, refDicM,
                                                      startYearX, fileLog,
                                                      tableName)