Exemplo n.º 1
0
 def sreduce(self, key, values, task):
     """
     Now calculate the cartesian product by iterating over all records
     grouped together on the join key.
     """
     list1 = []
     for json in values:
         record = happy.json.decode(json)
         order = record['__joinorder__']
         newrec = {}
         for key in record.keys():
             if (key != '__joinorder__'):
                 newkey = "%s%s" % (key, order)
                 newrec[newkey] = record[key]
         if (order==1):
             list1.append(newrec)
         else:
             try:
                 for i in xrange(len(list1)):
                     r = list1[i]
                     emitrec = {}
                     emitrec.update(newrec)
                     emitrec.update(r)
                     task.collect(key, happy.json.encode(emitrec))
             except:
                 logger.error("JOIN FAILED ON RECORD: (%s, %s)" % (key, json))
Exemplo n.º 2
0
 def fire(self):
     """
     Runs this node's TripleQuery job.  Blocks until completed.
     """
     job = TripleQuery(self.query, self.inputpaths, self.outputpath)
     try:
         job.run()
         logger.debug("TripleQuery run.  Setting status to done.")
         self.status = 'done'
     except Exception:
         logger.error("Caught exception in TripleQuery.  Setting status to fail and deleting output.")
         dfs.delete(self.outputpath)
         self.status = 'fail'
Exemplo n.º 3
0
 def fire(self):
     """
     Runs this node's HappyJob.  Blocks until completed.
     """
     if (self.job):
         job = self.job
         try:
             job.run()
             logger.debug("Job run.  Setting status to done.")
             self.status = 'done'
         except Exception:
             logger.error("Caught exception.  Setting status to fail and deleting output.")
             dfs.delete(self.outputpath)
             self.status = 'fail'
Exemplo n.º 4
0
 def sreduce(self, key, values, task):
     """
     Now calculate the cartesian product by iterating over all records
     grouped together on the join key.
     """
     list1 = []
     found_file1 = False
     found_file2 = False
     outer_file1 = (self.outer=='left' or self.outer=='both')
     outer_file2 = (self.outer=='right' or self.outer=='both')
     for json in values:
         record = happy.json.decode(json)
         order = record['__joinorder__']
         newrec = {}
         for key in record.keys():
             newrec[key] = record[key]
         if (order==1):
             found_file1 = True
             list1.append(newrec)
         else:
             try:
                 found_file2 = True
                 for i in xrange(len(list1)):
                     r = list1[i]
                     emitrec = {}
                     emitrec.update(newrec)
                     emitrec.update(r)
                     emitrec['__jointype__'] = 'inner'
                     task.collect(key, happy.json.encode(emitrec))
                 if outer_file2 and not found_file1:
                     newrec['__jointype__'] = 'right'
                     task.collect(key, happy.json.encode(newrec))
             except:
                 logger.error("JOIN FAILED ON RECORD: (%s, %s)" % (key, json))
     if outer_file1 and not found_file2:
         for i in xrange(len(list1)):
             r = list1[i]
             r['__jointype__'] = 'left'
             task.collect(key, happy.json.encode(r))