Exemplo n.º 1
0
 def _parse(self, ses, opt, sniff=0):
     self.chunk_init()
     keys = None
     for line in util.file_progress(ses, self.fn, sniff):
         line = line.strip()
         fields = line.split(',')
         if fields and fields[0] and fields[0][0] == '"':
             fields = [f.strip('"') for f in fields]
         if not keys:
             keys = fields
         else:
             self.chunk_extend()
             values = fields
             for k, v in zip(keys, values):
                 self.chunk[k][-1] = v
         for chunk in self.chunk_emit():
             yield chunk
     for chunk in self.chunk_emit(True):
         yield chunk
Exemplo n.º 2
0
 def _parse(self, ses, opt, sniff=0):
     self.chunk_init()
     keys = None
     for line in util.file_progress(ses, self.fn, sniff):
         line = line.strip()
         fields = line.split(',')
         if fields and fields[0] and fields[0][0]=='"':
             fields = [f.strip('"') for f in fields]
         if not keys:
             keys = fields
         else:
             self.chunk_extend()
             values = fields
             for k, v in zip(keys, values):
                 self.chunk[k][-1] = v
         for chunk in self.chunk_emit():
             yield chunk
     for chunk in self.chunk_emit(True):
         yield chunk
Exemplo n.º 3
0
    def _parse(self, ses, opt, sniff=0):

        ignore = set(['floatApprox', '$date', '$numberLong', '$timestamp'])
        chunk_size = 100

        def flatten(result, j, key=None):
            if type(j) == dict:
                for k, v in j.items():
                    if k in ignore:
                        flatten(result, v, key)
                    else:
                        flatten(result, v, key + util.SEP + k if key else k)
            elif type(j) == list:
                for i, v in enumerate(j):
                    flatten(result, v,
                            key + util.SEP + str(i) if key else str(i))
            else:
                result[key] = [j]
            return result

        chunk = {}
        for line in util.file_progress(ses, self.fn, sniff):
            try:
                j = flatten({}, json.loads(line))
                if j.keys() != chunk.keys() or len(
                        chunk.values()[0]) >= chunk_size:
                    if chunk:
                        yield chunk
                    chunk = j
                else:
                    for k, v in j.items():
                        chunk[k].extend(v)
            except ValueError:
                # ignore bad json
                pass
            except:
                traceback.print_exc()
                break
        yield chunk
Exemplo n.º 4
0
    def _parse(self, ses, opt, sniff=0):

        ignore = set(['floatApprox', '$date', '$numberLong', '$timestamp'])
        chunk_size = 100
    
        def flatten(result, j, key=None):
            if type(j)==dict:
                for k, v in j.items():
                    if k in ignore:
                        flatten(result, v, key)
                    else:
                        flatten(result, v, key + util.SEP + k if key else k)
            elif type(j)==list:
                for i, v in enumerate(j):
                    flatten(result, v, key + util.SEP + str(i) if key else str(i))
            else:
                result[key] = [j]
            return result
    
        chunk = {}
        for line in util.file_progress(ses, self.fn, sniff):
            try:
                j = flatten({}, json.loads(line))
                if j.keys() != chunk.keys() or len(chunk.values()[0]) >= chunk_size:
                    if chunk:
                        yield chunk
                    chunk = j
                else:
                    for k, v in j.items():
                        chunk[k].extend(v)
            except ValueError:
                # ignore bad json
                pass
            except:
                traceback.print_exc()
                break
        yield chunk
Exemplo n.º 5
0
        def _parse(self, ses, opt, sniff=0):

            # init
            self.chunk_init()
            pt = util.parse_time()

            # process the file
            for line in util.file_progress(ses, self.fn, sniff):

                # match line
                line = line.strip()
                m = rec.match(line)
                if m:

                    # process time_key
                    time = m.group(time_key)
                    if time:
                        for chunk in self.chunk_emit(flush=False):
                            yield chunk
                        self.chunk_extend()
                        self.chunk[time_key][-1] = time
                        self.last_time = time

                    # process each data_key
                    for data_key in rec.groupindex:
                        if data_key != time_key:
                            data = m.group(data_key)
                            if data != None:
                                if self.chunk[data_key][-1] != None:
                                    self.chunk_extend()
                                    self.chunk[time_key][-1] = self.last_time
                                self.chunk[data_key][-1] = data

            # finish up
            for chunk in self.chunk_emit(flush=True):
                yield chunk
Exemplo n.º 6
0
     def _parse(self, ses, opt, sniff=0):
 
         # init
         self.chunk_init()
         pt = util.parse_time()
 
         # process the file
         for line in util.file_progress(ses, self.fn, sniff):
 
             # match line
             line = line.strip()
             m = rec.match(line)
             if m:
 
                 # process time_key
                 time = m.group(time_key)
                 if time:
                     for chunk in self.chunk_emit(flush=False):
                         yield chunk
                     self.chunk_extend()
                     self.chunk[time_key][-1] = time
                     self.last_time = time
 
                 # process each data_key
                 for data_key in rec.groupindex:
                     if data_key != time_key:
                         data = m.group(data_key)
                         if data != None:
                             if self.chunk[data_key][-1] != None:
                                 self.chunk_extend()
                                 self.chunk[time_key][-1] = self.last_time
                             self.chunk[data_key][-1] = data
 
         # finish up
         for chunk in self.chunk_emit(flush=True):
             yield chunk