def join(data, strategy, source_left, source_right, destination, key_left, key_right, prefix_left, prefix_right, presorted, buffersize, tempdir, cache, missing): """Perform a join on two data tables.""" source_left = data.get(source_left) source_right = data.get(source_right) kwargs = {} if key_left == key_right: kwargs['key'] = key_left else: kwargs['lkey'] = key_left kwargs['rkey'] = key_right if presorted is True: kwargs['presorted'] = presorted if buffersize is not None: kwargs['buffersize'] = buffersize if tempdir: kwargs['tempdir'] = tempdir if 'anti' not in strategy: if prefix_left is not None: kwargs['lprefix'] = prefix_left if prefix_right is not None: kwargs['rprefix'] = prefix_right if strategy not in ['join', 'antijoin', 'hashjoin', 'hashantijoin']: kwargs['missing'] = missing if strategy == 'join': o = petl.join(source_left, source_right, **kwargs) elif strategy == 'leftjoin': o = petl.leftjoin(source_left, source_right, **kwargs) elif strategy == 'lookupjoin': o = petl.lookupjoin(source_left, source_right, **kwargs) elif strategy == 'rightjoin': o = petl.rightjoin(source_left, source_right, **kwargs) elif strategy == 'outerjoin': o = petl.outerjoin(source_left, source_right, **kwargs) elif strategy == 'antijoin': o = petl.antijoin(source_left, source_right, **kwargs) elif strategy == 'hashjoin': o = petl.antijoin(source_left, source_right, **kwargs) elif strategy == 'hashleftjoin': o = petl.hashleftjoin(source_left, source_right, **kwargs) elif strategy == 'hashlookupjoin': o = petl.hashlookupjoin(source_left, source_right, **kwargs) elif strategy == 'hashrightjoin': o = petl.hashrightjoin(source_left, source_right, **kwargs) data.set(destination, o)
def join_execute(cl, cr, join, **kwargs): cl, cr = cl(), cr() if 'addLfields' in kwargs: cl = etl.addfields(cl, kwargs['addLfields']) if 'addRfields' in kwargs: cr = etl.addfields(cr, kwargs['addRfields']) args = cl, cr if join == Join.UNION: c = etl.crossjoin(*args) else: kwargs = filter_keys(kwargs, ("key", "lkey", "rkey", "missing", "presorted", "buffersize", "tempdir", "cache")) if join == Join.INNER: c = etl.join(*args, **kwargs) elif join == Join.LEFT: c = etl.leftjoin(*args, **kwargs) elif join == Join.RIGHT: c = etl.rightjoin(*args, **kwargs) elif join == Join.FULL: c = etl.outerjoin(*args, **kwargs) return c
# rightjoin table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']] from petl import rightjoin, look look(table1) look(table2) table3 = rightjoin(table1, table2, key='id') look(table3) # outerjoin table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']] from petl import outerjoin, look
# leftjoin() ############ import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']] table3 = etl.leftjoin(table1, table2, key='id') table3 # rightjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']] table3 = etl.rightjoin(table1, table2, key='id') table3 # outerjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']] table3 = etl.outerjoin(table1, table2, key='id') table3 # crossjoin() ############# import petl as etl
table3 # rightjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']] table3 = etl.rightjoin(table1, table2, key='id') table3 # outerjoin() ############# import petl as etl table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']] table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']]