def main(): global elapsed_time_s global VERBOSE global SILENT parser = argparse.ArgumentParser(description='Draw CC Packets from MPG2 Transport Stream file.') parser.add_argument('infile', help='Input filename (MPEG2 Transport Stream File)', type=str) parser.add_argument('-p', '--pid', help='Specify a PID of a PES known to contain closed caption info (tool will attempt to find the proper PID if not specified.).', type=int, default=-1) args = parser.parse_args() infilename = args.infile pid = args.pid if not os.path.exists(infilename): print 'Input filename :' + infilename + " does not exist." os.exit(-1) for data_group in next_data_group(infilename): try: if not data_group.is_management_data(): #We now have a Data Group that contains caption data. #We take out its payload, but this is further divided into 'Data Unit' structures caption = data_group.payload() #iterate through the Data Units in this payload via another generator. for data_unit in next_data_unit(caption): #we're only interested in those Data Units which are "statement body" to get CC data. if not isinstance(data_unit.payload(), StatementBody): continue #formatter function above. This dumps the basic text to stdout. cc = formatter(data_unit.payload().payload(), 0) if cc and VERBOSE: #according to best practice, always deal internally with UNICODE, and encode to #your encoding of choice as late as possible. Here, i'm encoding as UTF-8 for #my command line. #DECODE EARLY, ENCODE LATE print(cc.encode('utf-8')) else: # management data management_data = data_group.payload() for language in range(management_data.num_languages()): print("<Closed caption management data for language: " + management_data.language_code(language) + ">") except EOFError: pass except Exception, err: print("Exception thrown while handling .es datagroup post parsing.") traceback.print_exc(file=sys.stdout)
def main(): parser = argparse.ArgumentParser( description='Draw CC Packets from MPG2 Elementary Stream.') parser.add_argument('infile', help='Input filename (MPEG2 Elmentary Stream)', type=str) #parser.add_argument('-p', '--pid', help='Pid of stream .', type=str, default='') args = parser.parse_args() infilename = args.infile if not os.path.exists(infilename): print 'Please provide input Elemenatry Stream file.' os.exit(-1) #ARIB data is packed into a PES at a high level as 'Data Group' structures #We iterate through the input PES file via the next_data_group generator for data_group in next_data_group(infilename): #There are several types of Data Groups. I'm here filtering out those #that are 'management data' to get to those which contain basic CC text. if not data_group.is_management_data(): #We now have a Data Group that contains caption data. #We take out its payload, but this is further divided into 'Data Unit' structures caption = data_group.payload() #iterate through the Data Units in this payload via another generator. for data_unit in next_data_unit(caption): #we're only interested in those Data Units which are "statement body" to get CC data. if not isinstance(data_unit.payload(), StatementBody): continue #okay. Finally we've got a data unit with CC data. Feed its payload to the custom #formatter function above. This dumps the basic text to stdout. cc = formatter(data_unit.payload().payload()) if cc: #according to best practice, always deal internally with UNICODE, and encode to #your encoding of choice as late as possible. Here, i'm encoding as UTF-8 for #my command line. #DECODE EARLY, ENCODE LATE print(cc.encode('utf-8'))