Пример #1
0
 def __init__ (self, jars = [], properties = {}):
     ''' Initialize Pig. '''
     for jar in jars:
         logger.debug (" >>> register jar: %s", jar)
         Pig.registerJar (jar)
     for key in properties:
         logger.debug (" >>> set property: %s => %s", key, properties[key])
         Pig.set (key, properties [key]) 
Пример #2
0
def pig_init():
    """ Setup the pig settings used for all runs."""
    #I'm using the piggybank from s3://elasticmapreduce/libs/pig/0.9.1/piggybank-0.9.1-amzn.jar
    Pig.registerJar('/usr/share/pig/contrib/piggybank/piggybank.jar')

    Pig.define('DATE_TIME', 'org.apache.pig.piggybank.evaluation.datetime.DATE_TIME()')
    Pig.define('EXTRACT', 'org.apache.pig.piggybank.evaluation.string.EXTRACT()')
    Pig.define('FORMAT', 'org.apache.pig.piggybank.evaluation.string.FORMAT()')
    Pig.define('FORMAT_DT', 'org.apache.pig.piggybank.evaluation.datetime.FORMAT_DT()')
    Pig.define('REPLACE', 'org.apache.pig.piggybank.evaluation.string.REPLACE()')

    #The box I use is dedicated to pig so use a lot of memory
    Pig.set('pig.cachedbag.memusage', '0.6')