Python checkMetadata示例

编程语言: Python

命名空间/包名称: govdata.core

方法/功能: checkMetadata

hotexamples.com的示例: 2

Python checkMetadata - 已找到2个示例。这些是从开源项目中提取的最受好评的govdata.core.checkMetadata现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： deploy.py 项目： govdata/govdata-core

def updateMetacollection(iterator, metacollection,incremental,versionNumber,totalVariables,tcs,spcs,volumes,dimensions,times,locations,varFormats):
    
    checkMetadata(iterator)
    
    metadata = iterator.metadata
    
    metadata['']['columns'] = totalVariables
    metadata['']['source'] = pm.son.SON(metadata['']['source'])
    
    metadata['']['title'] = metadata['']['source']['dataset']['name']
    metadata['']['shortTitle'] = metadata['']['source']['dataset']['shortName']
    
    value_processors = metadata[''].get('valueProcessors',{})
    if metadata['']['columnGroups'].get('timeColumns',None):
        value_processors['timeColumns'] = 'return require("timedate").phrase(value);'
    if metadata['']['columnGroups'].get('spaceColumns',None):
        value_processors['spaceColumns'] =  'return require("location").phrase(value);'
    metadata['']['valueProcessors'] = value_processors    
    
    name_processors = metadata[''].get('nameProcessors',{})
    if metadata['']['columnGroups'].get('timeColNames',None):
        dateFormat = metadata['']['dateFormat']
        name_processors['timeColNames'] = 'return require("timedate").phrase(require("timedate").stringtomongo(value,"' + dateFormat + '"));'
    metadata['']['nameProcessors'] = name_processors  

    metacollection.ensure_index([('name',pm.DESCENDING),('versionNumber',pm.DESCENDING)], unique=True)   
    
    metadata['']['varFormats'] = varFormats
    for k in metadata.keys():
        metadata[k]['volume'] = volumes[k]
        metadata[k]['dimensions'] = dimensions[k]
        getCommonDatesLocations(iterator,metadata,times,locations,dimensions,k)    
                    
    if incremental:
        previousMetadata = dict([(p["name"],p) for  p in metacollection.find({'versionNumber':versionNumber - 1})])
        if previousMetadata:
            for x in previousMetadata.values():
                x.pop('_id')
            
            for k in previousMetadata.keys():
                if k not in metadata.keys():
                    metadata[k] = previousMetadata[k]
                    
            for k in previousMetadata[''].keys():
                if k not in metadata[''].keys():
                    metadata[''][k] = previousMetadata[''][k]
            
            for k in previousMetadata['']['columnGroups'].keys():
                if k not in metadata['']['columnGroups'].keys():
                    metadata['']['columnGroups'][k] = previousMetadata['']['columnGroups'][k]
                else:
                    metadata['']['columnGroups'][k] += previousMetadata['']['columnGroups'][k] 
                                

    for k in metadata.keys():

        x = metadata[k]
        x['name'] = k
        x['versionNumber'] = versionNumber
        id = metacollection.insert(x,safe=True)

示例#2

显示文件

文件： deploy.py 项目： govdata/govdata-core

def updateCollection(download_dir,collectionName,parserClass,checkpath,certpath,parserArgs=None,parserKwargs=None,incremental=False):
    
    connection =  pm.Connection(document_class=pm.son.SON)
    
    source_metadata = get_source_data(collectionName)
    
    db = connection['govdata']
    assert not '__' in collectionName, 'collectionName must not contain consecutive underscores'
    metaCollectionName = '__' + collectionName + '__'
    versionName = '__' + collectionName + '__VERSIONS__'
    sliceDBName =  '__' + collectionName + '__SLICES__'
    
    collection = db[collectionName]
    metacollection = db[metaCollectionName]
    versions = db[versionName]     
    sliceDB = db[sliceDBName]
            
    if incremental:     
        if versionName not in db.collection_names():
            startInc = 0
        else:
            startInc = get_max_increment_fromDB(versions) + 1
        endInc = get_max_increment(download_dir)
        sources = [increment_format(download_dir,i) for i in range(startInc,endInc + 1)]
    else:
        sources = [download_dir]
        startInc = endInc = None
        
    if parserArgs == None:
        parserArgs = ()
    if parserKwargs == None:
        parserKwargs = {}
        
    if sources:
        iterator = parserClass(sources[0],*parserArgs,**parserKwargs)
        iterator.set_source_metadata(source_metadata)
    
        uniqueIndexes = iterator.uniqueIndexes
        ColumnGroups = iterator.columnGroups
        
        sliceColTuples = getSliceColTuples(iterator.sliceCols)
        sliceColTuplesFlat = uniqify([tuple(sorted(uniqify(Flatten(sct)))) for sct in sliceColTuples])
      
        sliceColList = uniqify(Flatten(ListUnion(sliceColTuples)))
        ContentCols = set(sliceColList + getContentCols(iterator))
            
        if hasattr(iterator,'dateFormat'):
            TimeFormatter = td.mongotimeformatter(iterator.dateFormat)
            
    
        if collectionName in db.collection_names():
            versionNumber = max(versions.distinct('versionNumber')) + 1
            storedAllMetadata = metacollection.find_one({'name':'','versionNumber':versionNumber-1})
            totalVariables = storedAllMetadata['columns']
            VarMap = dict(zip(totalVariables,[str(x) for x in range(len(totalVariables))]))   
            
            #check things are the same 
            #and check consistent  do so for all soruces
            
        else:
            versionNumber = 0
            IndexCols = uniqify([x for x in ['subcollections'] + sliceColList + ListUnion([ColGroupsFlatten(ColumnGroups,k) for k in ['indexColumns','labelColumns','timeColumns','spaceColumns']]) if x not in uniqueIndexes])
            
            totalVariables = SPECIAL_KEYS + uniqueIndexes + IndexCols
            
            assert not any(['.' in x or ('__' in x and x not in SPECIAL_KEYS) or x in ColumnGroups.keys() for x in totalVariables])
            
            VarMap = dict(zip(totalVariables,map(str,range(len(totalVariables)))))  
            
            cols = zip([VarMap[c] for c in uniqueIndexes + ['__versionNumber__']],[pm.DESCENDING]*(len(uniqueIndexes) + 1))
            collection.ensure_index(cols,unique=True,dropDups=True)
    
            for col in IndexCols:
                collection.ensure_index(VarMap[col])
            
            sliceDB.ensure_index('slice',unique=True,dropDups=True)
                            
        vNInd = VarMap['__versionNumber__']
        retInd = VarMap['__retained__']
        
        specialKeyInds = [VarMap[k] for k in SPECIAL_KEYS]
    
        if 'timeColumns' in iterator.columnGroups.keys():
            tcs = iterator.columnGroups['timeColumns']
        else:
            tcs = []
        
        if 'spaceColumns' in iterator.columnGroups.keys():
            spcs = iterator.columnGroups['spaceColumns']
        else:
            spcs = []
                  
        toParse = ListUnion([RecursiveFileList(source + '__PARSE__') for source in sources])
            
        oldc = None
        SpaceCache = {}    
        volumes = {'':0} 
        dimensions = {'':[]}
        times = {'':[]}
        locations = {'':[]}
        varFormats = {}
        for file in toParse:
            iterator.refresh(file)
            checkMetadata(iterator)
            tcs = iterator.columnGroups.get('timeColumns',[])
            spcs = iterator.columnGroups.get('spaceColumns',[])
            index = 0
            for c in iterator: 
                newVars = [x for x in c.keys() if not x in totalVariables]
                assert not any (['__' in x or '.' in x or x in ColumnGroups.keys() for x in newVars]) , '__ and . must not appear in key names.'     
                totalVariables += newVars
                VarMap.update(dict(zip(newVars,map(str,range(len(totalVariables) - len(newVars),len(totalVariables))))))
                
                for tc in tcs:   #time handling 
                    if tc in c.keys():
                        c[tc] = TimeFormatter(c[tc])
                if COMPLETE_SPACE:        
                    for spc in spcs:
                        if spc in c.keys():   #space
                            t = getT(c[spc])
                            if t in SpaceCache.keys():
                                c[spc] = SpaceCache[t].copy()
                            else:
                                c[spc] = loc.SpaceComplete(c[spc])
                                SpaceCache[t] = c[spc].copy()      
                if index % 100 == 0:
                    print 'At', index
                index += 1
                sctf = processSct(sliceColTuplesFlat,oldc,c)
                processRecord(c,collection,VarMap,totalVariables,uniqueIndexes,versionNumber,specialKeyInds,incremental,sliceDB,sctf,ContentCols)
                incrementThings(c,volumes,dimensions,times,locations,varFormats,tcs,spcs)
                
                oldc = c
                
        any_deleted = False                
        if incremental:
            collection.update({vNInd:{'$lte': versionNumber - 1}}, {'$set':{vNInd:versionNumber}})                    
            sliceDB.update({},{'$set':{'version':versionNumber}})  
   
        else:
            deleted = collection.find({vNInd:versionNumber - 1, retInd : {'$exists':False}})
            for d in deleted:
                any_deleted = True
                sliceDelete(d,collection,sliceColTuples,VarMap,sliceDB,version)
                                   
        if any_deleted:
            subColInd = str(totalVariables.index('Subcollections'))
            subcols = [''] + uniqify(ListUnion(collection.distinct(subColInd)))
            for sc in subcols:
                volumes[sc] = collection.find({subColInd:sc}).count()
                dimensions[sc] = [k for k in totalVariables if collection.find_one({subColInd:sc,str(totalVariables.index(k)):{'$exists':True}})]
                times[sc] = ListUnion([collection.find({subColInd:sc}).distinct(t) for t in tcs])
                locations[sc] = ListUnion([collection.find({subColInd:sc}).distinct(t) for t in spcs])
                
               
        updateMetacollection(iterator,metacollection,incremental,versionNumber,totalVariables,tcs,spcs,volumes,dimensions,times,locations,varFormats)
        
        updateAssociatedFiles(sources,collection)
        
        updateVersionHistory(versionNumber,versions,startInc,endInc)
    
    updateSourceDBFromCollections(collectionNames = [collectionName])
    connection.disconnect()
    createCertificate(certpath,'Collection ' + collectionName + ' written to DB.')