def __dict_from_table(table: bytes) -> List[Dict[str, Any]]: """Create a dict from table text Arguments: table {[bytes]} -- A byte string table provided by wpa_supplicant Returns: [list of dicts] -- A list of dicts where keys are table header values """ raw_lines = table.strip().split(b"\n") listed_lines = [] for raw_line in raw_lines: listed_lines += [raw_line.split(b"\t")] # Create keys from header try: temp_header = listed_lines.pop(0)[0] header = temp_header.replace(b" ", b"").split(b"/") except Exception as error: raise ParseError( "Failed creating header to dictionary.") from error output: List[Any] = [] for line in listed_lines: output += [{}] try: for key, value in zip(header, line): output[-1][WifiManager.__decode_escaped( key)] = WifiManager.__decode_escaped(value) except Exception as error: raise ParseError( "Failed parsing dictionary data from table.") from error return output
def __init__(self, file): core.AVContainer.__init__(self) self.mime = 'video/x-ms-asf' self.type = 'asf format' self._languages = [] self._extinfo = {} h = file.read(30) if len(h) < 30: raise ParseError() (guidstr, objsize, objnum, reserved1, reserved2) = struct.unpack('<16sQIBB', h) guid = self._parseguid(guidstr) if (guid != GUIDS['ASF_Header_Object']): raise ParseError() if reserved1 != 0x01 or reserved2 != 0x02: raise ParseError() log.debug('Header size: %d / %d objects' % (objsize, objnum)) header = file.read(objsize - 30) for _ in range(0, objnum): h = self._getnextheader(header) header = header[h[1]:] del self._languages del self._extinfo
def __init__(self, file): core.AVContainer.__init__(self) self.sequence_header_offset = 0 self.mpeg_version = 2 # detect TS (fast scan) if not self.isTS(file): # detect system mpeg (many infos) if not self.isMPEG(file): # detect PES if not self.isPES(file): # Maybe it's MPEG-ES if self.isES(file): # If isES() succeeds, we needn't do anything further. return if file.name.lower().endswith('mpeg') or \ file.name.lower().endswith('mpg'): # This has to be an mpeg file. It could be a bad # recording from an ivtv based hardware encoder with # same bytes missing at the beginning. # Do some more digging... if not self.isMPEG(file, force=True) or \ not self.video or not self.audio: # does not look like an mpeg at all raise ParseError() else: # no mpeg at all raise ParseError() self.mime = 'video/mpeg' if not self.video: self.video.append(core.VideoStream()) if self.sequence_header_offset <= 0: return self.progressive(file) for vi in self.video: vi.width, vi.height = self.dxy(file) vi.fps, vi.aspect = self.framerate_aspect(file) vi.bitrate = self.bitrate(file) if self.length: vi.length = self.length if not self.type: self.type = 'MPEG Video' # set fourcc codec for video and audio vc, ac = 'MP2V', 'MP2A' if self.mpeg_version == 1: vc, ac = 'MPEG', 0x0050 for v in self.video: v.codec = vc for a in self.audio: if not a.codec: a.codec = ac
def parseFileLine(self, line): # Formats: # /home/kwalker/src/europa/src/golang/src/github.com/control-center/serviced/cli/api/daemon.go:306 +0xb13 # /usr/local/go/src/compress/flate/deflate.go:150 fieldnum = 0 warnings = [] line = line.strip() try: colonIndex = line.rfind(':') if colonIndex == -1: raise ParseError('Not a file line (no colon found)!') fieldnum = 1 # File name if colonIndex == 0: raise ParseError('No filename found!') self.filename, line = line.rsplit(':', 1) fieldnum = 2 # Line number if line.find(' ') == -1: linenum = line line = '' else: linenum, line = line.split(' ', 1) if not linenum.isdigit(): raise ParseError( 'Expected integer line number, got: {0}'.format(linenum)) self.linenum = int(linenum) fieldnum = 3 # Offset if line.find(' ') == -1: self.offset = line line = '' else: self.offset, line = line.split(' ', 1) fieldnum = 0 # Done processing fields # Verify no extra fields found on line if line is not None and len(line) > 0: warnings.append( self.formatFileMessage( 'Extra fields found: ' '{0}' ''.format(line), fieldnum)) return warnings except Exception as exc: raiseWithModifiedMessage( sys.exc_info(), self.formatFileMessage(str(exc), fieldnum))
def __init__(self, file): core.AVContainer.__init__(self) self._references = [] self.mime = 'video/quicktime' self.type = 'Quicktime Video' h = file.read(8) try: (size, type) = struct.unpack('>I4s', h) except struct.error: # EOF. raise ParseError() if type == 'ftyp': # file type information if size >= 12: # this should always happen if file.read(4) != 'qt ': # not a quicktime movie, it is a mpeg4 container self.mime = 'video/mp4' self.type = 'MPEG-4 Video' size -= 4 file.seek(size - 8, 1) h = file.read(8) (size, type) = struct.unpack('>I4s', h) while type in ['mdat', 'skip']: # movie data at the beginning, skip file.seek(size - 8, 1) h = file.read(8) (size, type) = struct.unpack('>I4s', h) if not type in ['moov', 'wide', 'free']: log.debug(u'invalid header: %r' % type) raise ParseError() # Extended size if size == 1: size = struct.unpack('>Q', file.read(8)) # Back over the atom header we just read, since _readatom expects the # file position to be at the start of an atom. file.seek(-8, 1) while self._readatom(file): pass if self._references: self._set('references', self._references)
def test_dispatch_parse_error(self, as_json_mock: Mock, parse_location_mock: Mock): """A parsing error will return a ParseErrorMessage.""" parse_location_mock.side_effect = ParseError('') as_json_mock.return_value = Response() self.test_client.post('/chat/messages', data=self.data) as_json_mock.assert_called_once()
def __init__(self, file): core.AVContainer.__init__(self) # read the header h = file.read(12) if h[:4] != "RIFF" and h[:4] != 'SDSS': raise ParseError() self.has_idx = False self.header = {} self.junkStart = None self.infoStart = None self.type = h[8:12] if self.type == 'AVI ': self.mime = 'video/avi' elif self.type == 'WAVE': self.mime = 'audio/wav' try: while self._parseRIFFChunk(file): pass except IOError: log.exception(u'error in file, stop parsing') self._find_subtitles(file.name) if not self.has_idx and isinstance(self, core.AVContainer): log.debug(u'WARNING: avi has no index') self._set('corrupt', True)
def parse(self, source_string): self.source_string = source_string self.length = len(source_string) while True: self._parse_whitespaces() if self.pos == self.length: break token_word = self._parse_token_word() token_type = self._get_type(token_word) if token_type == Token.TYPE_CONST: self.token_list.append(self._get_const_token(token_word)) elif token_type == Token.TYPE_KEYWORD: self.token_list.append(self._get_keyword_token(token_word)) elif token_type == Token.TYPE_IDENTIFIER: self.token_list.append(self._get_identifier_token(token_word)) elif token_type == Token.TYPE_DELIMITER: self.token_list.append(self._get_delimiter_token(token_word)) else: raise ParseError( self.line, self.line_pos, "not a valid token for token word '{}'".format(token_word)) if not self._has_next(): break return self.token_list
def parse_input(input_string: str, delimiter: str = ' ') -> List: try: splitted_lines = filter(lambda x: x != '', input_string.splitlines()) output_arr = [] for line in splitted_lines: new_line = [(int(val[0]), int(val[1])) for val in line.split(delimiter)] output_arr.append(new_line) return output_arr except Exception as e: raise ParseError(f"Can't parse data: {str(e)}")
def __init__(self, file): core.AVContainer.__init__(self) self.samplerate = 1 self.file = file # Read enough that we're likely to get the full seekhead (FIXME: kludge) buffer = file.read(2000) if len(buffer) == 0: # Regular File end raise ParseError() # Check the Matroska header header = EbmlEntity(buffer) if header.get_id() != MATROSKA_HEADER_ID: raise ParseError() log.debug(u'HEADER ID found %08X' % header.get_id()) self.mime = 'video/x-matroska' self.type = 'Matroska' self.has_idx = False self.objects_by_uid = {} # Now get the segment self.segment = segment = EbmlEntity(buffer[header.get_total_len():]) # Record file offset of segment data for seekheads self.segment.offset = header.get_total_len() + segment.get_header_len() if segment.get_id() != MATROSKA_SEGMENT_ID: log.debug(u'SEGMENT ID not found %08X' % segment.get_id()) return log.debug(u'SEGMENT ID found %08X' % segment.get_id()) try: for elem in self.process_one_level(segment): if elem.get_id() == MATROSKA_SEEKHEAD_ID: self.process_elem(elem) except ParseError: pass if not self.has_idx: log.warning(u'File has no index') self._set('corrupt', True)
def parse_location(self, form_body: dict) -> str: """Parse the query and extract the location string.""" query = form_body['text'] for regex in self.phrase_regex: result = regex.match(query) if result is not None: data = result.groupdict() if 'time' not in data: data['time'] = 'today' return data['location'], data['time'] raise ParseError('Did not recognize phrase: {}'.format(query), data=query)
def __init__(self, file): core.AVContainer.__init__(self) self.mime = 'video/real' self.type = 'Real Video' h = file.read(10) try: (object_id, object_size, object_version) = struct.unpack('>4sIH', h) except struct.error: # EOF. raise ParseError() if not object_id == '.RMF': raise ParseError() file_version, num_headers = struct.unpack('>II', file.read(8)) log.debug('size: %d, ver: %d, headers: %d' % \ (object_size, file_version, num_headers)) for _ in range(0, num_headers): try: oi = struct.unpack('>4sIH', file.read(10)) except (struct.error, IOError): # Header data we expected wasn't there. File may be # only partially complete. break if object_id == 'DATA' and oi[0] != 'INDX': log.debug( 'INDX chunk expected after DATA but not found -- file corrupt' ) break (object_id, object_size, object_version) = oi if object_id == 'DATA': # Seek over the data chunk rather than reading it in. file.seek(object_size - 10, 1) else: self._read_header(object_id, file.read(object_size - 10)) log.debug('%r [%d]' % (object_id, object_size - 10))
def __init__(self, inbuf): # Compute the EBML id # Set the CRC len to zero self.crc_len = 0 # Now loop until we find an entity without CRC try: self.build_entity(inbuf) except IndexError: raise ParseError() while self.get_id() == MATROSKA_CRC32_ID: self.crc_len += self.get_total_len() inbuf = inbuf[self.get_total_len():] self.build_entity(inbuf)
def _parseAVIH(self, t): retval = {} v = struct.unpack('<IIIIIIIIIIIIII', t[0:56]) (retval['dwMicroSecPerFrame'], retval['dwMaxBytesPerSec'], retval['dwPaddingGranularity'], retval['dwFlags'], retval['dwTotalFrames'], retval['dwInitialFrames'], retval['dwStreams'], retval['dwSuggestedBufferSize'], retval['dwWidth'], retval['dwHeight'], retval['dwScale'], retval['dwRate'], retval['dwStart'], retval['dwLength']) = v if retval['dwMicroSecPerFrame'] == 0: log.warning(u'ERROR: Corrupt AVI') raise ParseError() return retval
def _parseOGGS(self, file): h = file.read(27) if len(h) == 0: # Regular File end return None, None elif len(h) < 27: log.debug(u'%d Bytes of Garbage found after End.' % len(h)) return None, None if h[:4] != "OggS": log.debug(u'Invalid Ogg') raise ParseError() version = ord(h[4]) if version != 0: log.debug(u'Unsupported OGG/OGM Version %d' % version) return None, None head = struct.unpack('<BQIIIB', h[5:]) headertype, granulepos, serial, pageseqno, checksum, \ pageSegCount = head self.mime = 'application/ogm' self.type = 'OGG Media' tab = file.read(pageSegCount) nextlen = 0 for i in range(len(tab)): nextlen += ord(tab[i]) else: h = file.read(1) packettype = ord(h[0]) & PACKET_TYPE_BITS if packettype == PACKET_TYPE_HEADER: h += file.read(nextlen - 1) self._parseHeader(h, granulepos) elif packettype == PACKED_TYPE_METADATA: h += file.read(nextlen - 1) self._parseMeta(h) else: file.seek(nextlen - 1, 1) if len(self.all_streams) > serial: stream = self.all_streams[serial] if hasattr(stream, 'samplerate') and \ stream.samplerate: stream.length = granulepos / stream.samplerate elif hasattr(stream, 'bitrate') and \ stream.bitrate: stream.length = granulepos / stream.bitrate return granulepos, nextlen + 27 + pageSegCount
def build_entity(self, inbuf): self.compute_id(inbuf) if self.id_len == 0: log.error('EBML entity not found, bad file format') raise ParseError() self.entity_len, self.len_size = self.compute_len(inbuf[self.id_len:]) self.entity_data = inbuf[self.get_header_len() : self.get_total_len()] self.ebml_length = self.entity_len self.entity_len = min(len(self.entity_data), self.entity_len) # if the data size is 8 or less, it could be a numeric value self.value = 0 if self.entity_len <= 8: for pos, shift in zip(range(self.entity_len), range((self.entity_len - 1) * 8, -1, -8)): self.value |= ord(self.entity_data[pos]) << shift
def _parse(self): for descendant in self.root.children: if descendant.dep_ == 'det': self.determiners.append(descendant) elif descendant.dep_ == 'amod': self.modifiers.append(AdjectivePhrase(descendant)) elif descendant.dep_ == 'punct': 'todo' elif descendant.dep_ == 'acl': self.modifiers.append(AdjectivePhrase(descendant)) elif descendant.dep_ == 'nummod': self.cnt = get_number_from_numeral(descendant) elif descendant.dep_ in {'cc', 'conj'}: 'should be already done' else: raise (ParseError('{} is unknown dep_ for {}: {}'.format( descendant, self.root, descendant.dep_)))
def parseFunctionLine(self, line): fieldnum = 0 warnings = [] line = line.strip() try: createdPrefix = 'created by ' if not line.startswith(createdPrefix): # Formats: # github.com/fsouza/go-dockerclient.func·008(0xc20808a7e0, 0xc2080b0210) # github.com/control-center/serviced/cli/api.(*daemon).run(0xc2080f0180, 0x0, 0x0) # Verify line ends with argument list. Kind of need to do this because object pointers # in full function name (like "(*daemon)" above) might otherwise look like args. rightParenIndex = line.rfind(')') leftParenIndex = line.rfind('(') if rightParenIndex != (len(line) - 1) or leftParenIndex == -1: raise ParseError( 'Not a function line (no argument list found)!') fieldnum = 1 # Function name (including path components) self.function, args = line.rsplit('(', 1) fieldnum = 2 # Function arguments args = args.rstrip(')') if len(args) > 0: for arg in args.split(','): self.addArg(arg.strip()) else: # Format: # created by os/signal.init·1 fieldnum = 1 # Function name (including path components) self.function = line[len(createdPrefix):] self.iscreatedby = True fieldnum = 0 # Done processing fields # No need to look for extra fields, given our initial check that line ends with "(args)" return warnings except Exception as exc: raiseWithModifiedMessage( sys.exc_info(), self.formatFunctionMessage(str(exc), fieldnum))
def _parse_while(self, predicate): c = self._current_char() word = "" if predicate(c): while predicate(c): word += c if not self._has_next(): self.pos = self.length break c = self._next() return word else: raise ParseError( self.line, self.line_pos, "not started with predicate: '{}' found".format(c))
def __dict_from_list(data: bytes) -> Dict[str, Any]: """Create a dict from a value based list Arguments: data {[bytes]} -- A byte string list provided by wpa_supplicant Returns: [dict] -- Dict where which key is the variable value of the list """ raw_lines = data.strip().split(b"\n") output = {} for line in raw_lines: try: key, value = line.split(b"=") output[WifiManager.__decode_escaped( key)] = WifiManager.__decode_escaped(value) except Exception as error: raise ParseError( "Failed parsing dictionary data from list.") from error return output
def __init__(self, file): core.AVContainer.__init__(self) self.samplerate = 1 self.all_streams = [] # used to add meta data to streams self.all_header = [] for i in range(MAXITERATIONS): granule, nextlen = self._parseOGGS(file) if granule == None: if i == 0: # oops, bad file raise ParseError() break elif granule > 0: # ok, file started break # seek to the end of the stream, to avoid scanning the whole file if (os.stat(file.name)[stat.ST_SIZE] > 50000): file.seek(os.stat(file.name)[stat.ST_SIZE] - 49000) # read the rest of the file into a buffer h = file.read() # find last OggS to get length info if len(h) > 200: idx = h.find('OggS') pos = -49000 + idx if idx: file.seek(os.stat(file.name)[stat.ST_SIZE] + pos) while 1: granule, nextlen = self._parseOGGS(file) if not nextlen: break # Copy metadata to the streams if len(self.all_header) == len(self.all_streams): for i in range(len(self.all_header)): # get meta info for key in self.all_streams[i].keys(): if self.all_header[i].has_key(key): self.all_streams[i][key] = self.all_header[i][key] del self.all_header[i][key] if self.all_header[i].has_key(key.upper()): asi = self.all_header[i][key.upper()] self.all_streams[i][key] = asi del self.all_header[i][key.upper()] # Chapter parser if self.all_header[i].has_key('CHAPTER01') and \ not self.chapters: while 1: s = 'CHAPTER%02d' % (len(self.chapters) + 1) if self.all_header[i].has_key(s) and \ self.all_header[i].has_key(s + 'NAME'): pos = self.all_header[i][s] try: pos = int(pos) except ValueError: new_pos = 0 for v in pos.split(':'): new_pos = new_pos * 60 + float(v) pos = int(new_pos) c = self.all_header[i][s + 'NAME'] c = core.Chapter(c, pos) del self.all_header[i][s + 'NAME'] del self.all_header[i][s] self.chapters.append(c) else: break # If there are no video streams in this ogg container, it # must be an audio file. Raise an exception to cause the # factory to fall back to audio.ogg. if len(self.video) == 0: raise ParseError # Copy Metadata from tables into the main set of attributes for header in self.all_header: self._appendtable('VORBISCOMMENT', header)
def __init__(self, file): core.AVContainer.__init__(self) self.mime = 'video/flv' self.type = 'Flash Video' data = file.read(13) if len(data) < 13 or struct.unpack('>3sBBII', data)[0] != 'FLV': raise ParseError() for _ in range(10): if self.audio and self.video: break data = file.read(11) if len(data) < 11: break chunk = struct.unpack('>BH4BI', data) size = (chunk[1] << 8) + chunk[2] if chunk[0] == FLV_TAG_TYPE_AUDIO: flags = ord(file.read(1)) if not self.audio: a = core.AudioStream() a.channels = (flags & FLV_AUDIO_CHANNEL_MASK) + 1 srate = (flags & FLV_AUDIO_SAMPLERATE_MASK) a.samplerate = ( 44100 << (srate >> FLV_AUDIO_SAMPLERATE_OFFSET) >> 3) codec = (flags & FLV_AUDIO_CODECID_MASK ) >> FLV_AUDIO_CODECID_OFFSET if codec < len(FLV_AUDIO_CODECID): a.codec = FLV_AUDIO_CODECID[codec] self.audio.append(a) file.seek(size - 1, 1) elif chunk[0] == FLV_TAG_TYPE_VIDEO: flags = ord(file.read(1)) if not self.video: v = core.VideoStream() codec = (flags & FLV_VIDEO_CODECID_MASK) - 2 if codec < len(FLV_VIDEO_CODECID): v.codec = FLV_VIDEO_CODECID[codec] # width and height are in the meta packet, but I have # no file with such a packet inside. So maybe we have # to decode some parts of the video. self.video.append(v) file.seek(size - 1, 1) elif chunk[0] == FLV_TAG_TYPE_META: log.info('metadata %r', str(chunk)) metadata = file.read(size) try: while metadata: length, value = self._parse_value(metadata) if isinstance(value, dict): log.info('metadata: %r', value) if value.get('creator'): self.copyright = value.get('creator') if value.get('width'): self.width = value.get('width') if value.get('height'): self.height = value.get('height') if value.get('duration'): self.length = value.get('duration') self._appendtable('FLVINFO', value) if not length: # parse error break metadata = metadata[length:] except (IndexError, struct.error, TypeError): pass else: log.info('unkown %r', str(chunk)) file.seek(size, 1) file.seek(4, 1)
def parseLine(self, line): # Possible formats: # goroutine 0 [idle]: # goroutine 1 [chan receive, 30 minutes]: # goroutine 17 [syscall, 31 minutes, locked to thread]: # goroutine 34 [syscall, locked to thread]: fieldnum = 0 warnings = [] line = line.strip() try: fieldnum = 1 # 'goroutine' goword, line = line.split(' ', 1) if goword.lower() != 'goroutine': raise ParseError( 'First word ({0}) is not \'goroutine\'!'.format(goword)) fieldnum = 2 # goroutine ID goId, line = line.split(' ', 1) if not goId.isdigit(): raise ParseError( 'Expected integer goroutine ID, got: {0}'.format(goId)) self.id = int(goId) fieldnum = 3 # State, wait time, etc. # Pull the state fields (the stuff between [ and ]) out from the rest of the line leftBraceIndex = line.find('[') rightBraceIndex = line.find(']') if leftBraceIndex == -1 or rightBraceIndex == -1 or rightBraceIndex < ( leftBraceIndex + 2): raise ParseError('State info not found (or is empty)!') if leftBraceIndex > 0: warnings.append( self.formatMessage( 'Extra fields found before state info: {0}'.format( line[0:leftBraceIndex]), fieldnum)) stateFields = line[leftBraceIndex + 1:rightBraceIndex].split(',') line = line[rightBraceIndex + 1:] # Now process each field for i in range(len(stateFields)): field = stateFields[i].strip() if i == 0: # First field is always state self.state = field elif field == 'locked to thread': self.lockedtothread = True elif re.match('^[0-9]+ minutes$', field): # Wait time waittime, minutes = field.split(' ', 1) if not waittime.isdigit(): raise ParseError( 'Expected integer wait time, got: {0}'.format( waittime[0])) self.waittime = int(waittime) else: warnings.append( self.formatMessage( 'Unknown field found in state info: {0}'.format( field), fieldnum)) fieldnum += 1 fieldnum = 0 # Done processing fields # Verify no extra fields found on line if line is not None and line != ':': warnings.append( self.formatMessage( 'Extra fields found: ' '{0}' ''.format(line), fieldnum)) return warnings except Exception as exc: raiseWithModifiedMessage(sys.exc_info(), self.formatMessage(str(exc), fieldnum))