def sreduce(self, key, values, task): """ Now calculate the cartesian product by iterating over all records grouped together on the join key. """ list1 = [] for json in values: record = happy.json.decode(json) order = record['__joinorder__'] newrec = {} for key in record.keys(): if (key != '__joinorder__'): newkey = "%s%s" % (key, order) newrec[newkey] = record[key] if (order==1): list1.append(newrec) else: try: for i in xrange(len(list1)): r = list1[i] emitrec = {} emitrec.update(newrec) emitrec.update(r) task.collect(key, happy.json.encode(emitrec)) except: logger.error("JOIN FAILED ON RECORD: (%s, %s)" % (key, json))
def fire(self): """ Runs this node's TripleQuery job. Blocks until completed. """ job = TripleQuery(self.query, self.inputpaths, self.outputpath) try: job.run() logger.debug("TripleQuery run. Setting status to done.") self.status = 'done' except Exception: logger.error("Caught exception in TripleQuery. Setting status to fail and deleting output.") dfs.delete(self.outputpath) self.status = 'fail'
def fire(self): """ Runs this node's HappyJob. Blocks until completed. """ if (self.job): job = self.job try: job.run() logger.debug("Job run. Setting status to done.") self.status = 'done' except Exception: logger.error("Caught exception. Setting status to fail and deleting output.") dfs.delete(self.outputpath) self.status = 'fail'
def sreduce(self, key, values, task): """ Now calculate the cartesian product by iterating over all records grouped together on the join key. """ list1 = [] found_file1 = False found_file2 = False outer_file1 = (self.outer=='left' or self.outer=='both') outer_file2 = (self.outer=='right' or self.outer=='both') for json in values: record = happy.json.decode(json) order = record['__joinorder__'] newrec = {} for key in record.keys(): newrec[key] = record[key] if (order==1): found_file1 = True list1.append(newrec) else: try: found_file2 = True for i in xrange(len(list1)): r = list1[i] emitrec = {} emitrec.update(newrec) emitrec.update(r) emitrec['__jointype__'] = 'inner' task.collect(key, happy.json.encode(emitrec)) if outer_file2 and not found_file1: newrec['__jointype__'] = 'right' task.collect(key, happy.json.encode(newrec)) except: logger.error("JOIN FAILED ON RECORD: (%s, %s)" % (key, json)) if outer_file1 and not found_file2: for i in xrange(len(list1)): r = list1[i] r['__jointype__'] = 'left' task.collect(key, happy.json.encode(r))