def command_lookup(ns): """ Command to lookup the given descriptors from command line """ table_group = TableGroupCacheManager.get_table_group( ns.tables_root_directory, ns.master_table_number, ns.originating_centre, ns.originating_subcentre, ns.master_table_version, ns.local_table_version) flat_text_render = FlatTextRenderer() table_group.B.load_code_and_flag( ) # load the code and flag tables for additional details descriptors = table_group.descriptors_from_ids( *[d.strip() for d in ns.descriptors.split(',')]) for descriptor in descriptors: if isinstance(descriptor, ElementDescriptor): print('{}, {}, {}, {}, {}'.format( flat_text_render.render(descriptor), descriptor.unit, descriptor.scale, descriptor.refval, descriptor.nbits)) if ns.code_and_flag and descriptor.unit in ( UNITS_FLAG_TABLE, UNITS_CODE_TABLE, UNITS_COMMON_CODE_TABLE_C1): code_and_flag = table_group.B.code_and_flag_for_descriptor( descriptor) if code_and_flag: for v, description in code_and_flag: output = u'{:8d} {}'.format(v, description) # With Python 2, some terminal utilities, e.g. more, redirect to file, # cause errors when unicode string is printed. The fix is to encode # them before print. if six.PY2: output = output.encode('utf-8', 'ignore') print(output) else: print(flat_text_render.render(descriptor))
def command_info(ns): """ Command to show metadata information of given files from command line. """ flat_text_render = FlatTextRenderer() decoder = Decoder(definitions_dir=ns.definitions_directory, tables_root_dir=ns.tables_root_directory) def show_message_info(m): bufr_template, table_group = m.build_template( ns.tables_root_directory, normalize=1) print(flat_text_render.render(m)) if ns.template: print(flat_text_render.render(bufr_template)) for filename in ns.filenames: with open(filename, 'rb') as ins: s = ins.read() if ns.multiple_messages: for bufr_message in generate_bufr_message(decoder, s, file_path=filename, info_only=True): show_message_info(bufr_message) elif ns.count_only: count = 0 for _ in generate_bufr_message(decoder, s, info_only=True): count += 1 print('{}: {}'.format(filename, count)) else: bufr_message = decoder.process(s, file_path=filename, info_only=True) show_message_info(bufr_message)
def command_query(ns): """ Command to query given BUFR files. """ decoder = Decoder(definitions_dir=ns.definitions_directory, tables_root_dir=ns.tables_root_directory, compiled_template_cache_max=ns.compiled_template_cache_max) for filename in ns.filenames: with open(filename, 'rb') as ins: s = ins.read() if ns.query_string.strip()[0] == '%': bufr_message = decoder.process(s, file_path=filename, info_only=True) from pybufrkit.mdquery import MetadataExprParser, MetadataQuerent querent = MetadataQuerent(MetadataExprParser()) value = querent.query(bufr_message, ns.query_string) print(filename) print(value) else: bufr_message = decoder.process(s, file_path=filename, wire_template_data=True, ignore_value_expectation=ns.ignore_value_expectation) from pybufrkit.dataquery import NodePathParser, DataQuerent querent = DataQuerent(NodePathParser()) query_result = querent.query(bufr_message, ns.query_string) if ns.json: if ns.nested: print(json.dumps(NestedJsonRenderer().render(query_result), **JSON_DUMPS_KWARGS)) else: print(json.dumps(FlatJsonRenderer().render(query_result), **JSON_DUMPS_KWARGS)) else: print(filename) print(FlatTextRenderer().render(query_result))
class TablesTests(unittest.TestCase): def setUp(self): self.table_group = TableGroupCacheManager.get_table_group(master_table_version=29) self.flat_text_renderer = FlatTextRenderer() def tearDown(self): pass def test_table_group_01(self): template = self.table_group.lookup(340009) assert self.flat_text_renderer.render(template) == table_group_01_cmp assert flat_member_ids(template) == [ 1007, 1031, 2019, 2020, 4001, 4002, 4003, 4004, 4005, 4006, 5040, 201136, 5041, 201000, 25071, 5001, 5001, 6001, 6001, 107064, 106032, 8012, 8013, 8065, 8072, 13039, 40015] def test_table_group_02(self): template = self.table_group.lookup(340008) assert self.flat_text_renderer.render(template) == table_group_02_cmp
def show_message(m): if ns.attributed: m.wire() if ns.json: print(json.dumps(NestedJsonRenderer().render(m), **JSON_DUMPS_KWARGS)) else: print(NestedTextRenderer().render(m)) else: if ns.json: print(json.dumps(FlatJsonRenderer().render(m), **JSON_DUMPS_KWARGS)) else: print(FlatTextRenderer().render(m))
def test(self): output = [] with open(os.path.join(DATA_DIR, 'prepbufr.bufr'), 'rb') as ins: for bufr_message in generate_bufr_message(self.decoder, ins.read()): output.append(FlatTextRenderer().render(bufr_message)) lines = [ line for line in ('\n'.join(output)).splitlines(True) if not line.startswith('TableGroupKey') and not line.startswith('stop_signature') ] assert ''.join(lines).endswith(compare)
def read_one_bufr_tc(self, file, id_no=None, fcast_rep=None): """ Read a single BUFR TC track file. Parameters: file (str, filelike): Path object, string, or file-like object id_no (int): Numerical ID; optional. Else use date + random int. fcast_rep (int): Of the form 1xx000, indicating the delayed replicator containing the forecast values; optional. """ decoder = pybufrkit.decoder.Decoder() if hasattr(file, 'read'): bufr = decoder.process(file.read()) elif hasattr(file, 'read_bytes'): bufr = decoder.process(file.read_bytes()) elif os.path.isfile(file): with open(file, 'rb') as i: bufr = decoder.process(i.read()) else: raise FileNotFoundError('Check file argument') text_data = FlatTextRenderer().render(bufr) # setup parsers and querents #npparser = pybufrkit.dataquery.NodePathParser() #data_query = pybufrkit.dataquery.DataQuerent(npparser).query meparser = pybufrkit.mdquery.MetadataExprParser() meta_query = pybufrkit.mdquery.MetadataQuerent(meparser).query timestamp_origin = dt.datetime( meta_query(bufr, '%year'), meta_query(bufr, '%month'), meta_query(bufr, '%day'), meta_query(bufr, '%hour'), meta_query(bufr, '%minute'), ) timestamp_origin = np.datetime64(timestamp_origin) orig_centre = meta_query(bufr, '%originating_centre') if orig_centre == 98: provider = 'ECMWF' else: provider = 'BUFR code ' + str(orig_centre) list1=[] with StringIO(text_data) as input_data: # Skips text before the beginning of the interesting block: for line in input_data: if line.startswith('<<<<<< section 4 >>>>>>'): # Or whatever test is needed break # Reads text until the end of the block: for line in input_data: # This keeps reading the file if line.startswith('<<<<<< section 5 >>>>>>'): break list1.append(line) list1=[li for li in list1 if li.startswith(" ") or li.startswith("##") ] list2=[] for items in list1: if items.startswith("######"): list2.append([0,items.split()[1],items.split()[2]]) else: list2.append([int(items.split()[0]),items.split()[1],items.split()[-1]]) df_ = pd.DataFrame(list2,columns=['id','code','Data']) def label_en (row,co): if row['code'] == co : return int(row['Data']) return np.nan df_['subset'] = df_.apply (lambda row: label_en(row,co='subset'), axis=1) df_['subset'] =df_['subset'].fillna(method='ffill') df_['model_sgn'] = df_.apply (lambda row: label_en(row,co='008005'), axis=1) df_['model_sgn'] =df_['model_sgn'].fillna(method='ffill') df_['model_sgn'] =df_['model_sgn'].fillna(method='bfill') for names, group in df_.groupby("subset"): pcen = list(group.query('code in ["010051"]')['Data'].values) latc = list(group.query('code in ["005002"] and model_sgn in [1]')['Data'].values) lonc = list(group.query('code in ["006002"] and model_sgn in [1]')['Data'].values) latm = list(group.query('code in ["005002"] and model_sgn in [3]')['Data'].values) lonm = list(group.query('code in ["006002"] and model_sgn in [3]')['Data'].values) wind = list(group.query('code in ["011012"]')['Data'].values) vhr = list(group.query('code in ["004024"]')['Data'].values) wind=[np.nan if value=='None' else float(value) for value in wind] pre=[np.nan if value=='None' else float(value)/100 for value in pcen] lonm=[np.nan if value=='None' else float(value) for value in lonm] lonc=[np.nan if value=='None' else float(value) for value in lonc] latm=[np.nan if value=='None' else float(value) for value in latm] latc=[np.nan if value=='None' else float(value) for value in latc] vhr=[np.nan if value=='None' else int(value) for value in vhr] timestep_int = np.array(vhr).squeeze() #np.array(msg['timestamp'].get_values(index)).squeeze() timestamp = timestamp_origin + timestep_int.astype('timedelta64[h]') year = list(group.query('code in ["004001"]')['Data'].values) month = list(group.query('code in ["004002"]')['Data'].values) day = list(group.query('code in ["004003"]')['Data'].values) hour = list(group.query('code in ["004004"]')['Data'].values) #forecs_agency_id = list(group.query('code in ["001033"]')['Data'].values) storm_name = list(group.query('code in ["001027"]')['Data'].values) storm_id = list(group.query('code in ["001025"]')['Data'].values) frcst_type = list(group.query('code in ["001092"]')['Data'].values) max_radius=np.sqrt(np.square(np.array(latc)-np.array(latm))+np.square(np.array(lonc)-np.array(lonm)))*111 date_object ='%04d%02d%02d%02d'%(int(year[0]),int(month[0]),int(day[0]),int(hour[0])) date_object=dt.datetime.strptime(date_object, "%Y%m%d%H") #timestamp=[(date_object + dt.timedelta(hours=int(value))).strftime("%Y%m%d%H") for value in vhr] #timestamp=[dt.datetime.strptime(value, "%Y%m%d%H") for value in timestamp] track = xr.Dataset( data_vars={ 'max_sustained_wind': ('time', wind[1:]), 'central_pressure': ('time', pre[1:]), 'ts_int': ('time', timestep_int), 'max_radius': ('time', max_radius[1:]), 'lat': ('time', latc[1:]), 'lon': ('time', lonc[1:]), 'environmental_pressure':('time', np.full_like(timestamp, DEF_ENV_PRESSURE, dtype=float)), 'radius_max_wind':('time', np.full_like(timestamp, np.nan, dtype=float)), }, coords={'time': timestamp, }, attrs={ 'max_sustained_wind_unit': 'm/s', 'central_pressure_unit': 'mb', 'name': storm_name[0].strip("'"), 'sid': storm_id[0].split("'")[1], 'orig_event_flag': False, 'data_provider': provider, 'id_no': 'NA', 'ensemble_number': int(names), 'is_ensemble': ['TRUE' if frcst_type[0]!='0' else 'False'][0], 'forecast_time': date_object, }) track = track.set_coords(['lat', 'lon']) track['time_step'] = track.ts_int - track.ts_int.shift({'time': 1}, fill_value=0) #track = track.drop('ts_int') track.attrs['basin'] = BASINS[storm_id[0].split("'")[1][2].upper()] cat_name = CAT_NAMES[set_category( max_sus_wind=track.max_sustained_wind.values, wind_unit=track.max_sustained_wind_unit, saffir_scale=SAFFIR_MS_CAT)] track.attrs['category'] = cat_name if track.sizes['time'] == 0: track= None if track is not None: self.append(track) else: LOGGER.debug('Dropping empty track, subset %s', names)
'jaso_214.bufr', 'mpco_217.bufr', 'profiler_european.bufr', 'rado_250.bufr', 'uegabe.bufr', ) def read_bufr_file(file_name): with open(os.path.join(DATA_DIR, file_name), 'rb') as ins: s = ins.read() return s decoder = Decoder() flat_text_renderer = FlatTextRenderer() nested_text_renderer = NestedTextRenderer() flat_json_renderer = FlatJsonRenderer() nested_json_renderer = NestedJsonRenderer() def test_nested_json_to_flat_json(): def func(filename): s = read_bufr_file(filename) bufr_message = decoder.process(s) nested = nested_json_renderer.render(bufr_message) flat = flat_json_renderer.render(bufr_message) assert flat == nested_json_to_flat_json(nested) for filename in FILES: func(filename)
sys.exit("No named storms, exiting") composite_storm_files = [named_storm_files[x:x+2] for x in range(0, len(named_storm_files),2)] for storm in composite_storm_files: ens_path = storm[0] det_path = storm[1] # Decode ensemble bufr file decoder = Decoder() with open(ens_path, 'rb') as ins: bufr_message = decoder.process(ins.read()) text_data = FlatTextRenderer().render(bufr_message) text_array = np.array(text_data.splitlines()) for line in text_array: if "WMO LONG STORM NAME" in line: storm_name = line.split()[-1][:-1] section4 = text_array[np.where(text_array=="<<<<<< section 4 >>>>>>")[0][0]:np.where(text_array=="<<<<<< section 5 >>>>>>")[0][0]] list = [] ens_subset = 0 attribute = None tplus_hour = None
def setUp(self): self.table_group = TableGroupCacheManager.get_table_group(master_table_version=29) self.flat_text_renderer = FlatTextRenderer()
def setUp(self): self.table_group = get_table_group() self.flat_text_renderer = FlatTextRenderer()
def setUp(self): self.table_group = get_table_group(master_table_version=29) self.flat_text_renderer = FlatTextRenderer()
def ecmwf_data_process(Input_folder,filepatern): """ preprocess ecmwf forecast data downloaded above """ #ecmwf_data_download(Input_folder,filepatern) path_ecmwf=os.path.join(Input_folder,'ecmwf/') decoder = Decoder() #1=Storm Centre 4 = Location of the storm in the perturbed analysis #5 = Location of the storm in the analysis #3=Location of maximum wind ecmwf_files = [f for f in listdir(path_ecmwf) if isfile(join(path_ecmwf, f))] #ecmwf_files = [file_name for file_name in ecmwf_files if file_name.startswith('A_JSXX02ECEP')] list_df=[] for ecmwf_file in ecmwf_files: ecmwf_file=ecmwf_files[0] f_name='ECMWF_'+ ecmwf_file.split('_')[1]+'_'+ecmwf_file.split('_')[4] model_name=ecmwf_file.split('_')[1][6:10] typhoon_name=ecmwf_file.split('_')[-4] with open(os.path.join(path_ecmwf,ecmwf_file), 'rb') as bin_file: bufr_message = decoder.process(bin_file.read()) text_data = FlatTextRenderer().render(bufr_message) STORMNAME=typhoon_name #ecmwf_file.split('_')[8] list1=[] with StringIO(text_data) as input_data: # Skips text before the beginning of the interesting block: for line in input_data: if line.startswith('<<<<<< section 4 >>>>>>'): # Or whatever test is needed break # Reads text until the end of the block: for line in input_data: # This keeps reading the file if line.startswith('<<<<<< section 5 >>>>>>'): break list1.append(line) list_var=["004024","004001","004002","004003","004004","004005","001092","011012","010051","005002","006002","001091",'001092',"008005"] list2=[[int(li.split()[0]),li.split()[1],li.split()[-1]] for li in list1 if li.startswith(" ") and li.split()[1] in list_var] df = pd.DataFrame(list2,columns=['id','code','Data']) def label_en (row,co): if row['code'] == co : return int(row['Data']) return np.nan df['model_sgn'] = df.apply (lambda row: label_en(row,co='008005'), axis=1) df['ensamble_num'] = df.apply (lambda row: label_en(row,co='001091'), axis=1) df['frcst_type'] = df.apply (lambda row: label_en(row,co='001092'), axis=1) df['frcst_type'] =df['frcst_type'].fillna(method='ffill') df['frcst_type'] =df['frcst_type'].fillna(method='bfill') df['ensamble_num'] =df['ensamble_num'].fillna(method='ffill') df['model_sgn'] =df['model_sgn'].fillna(method='ffill') df['model_sgn'] =df['model_sgn'].fillna(method='bfill') df_time = df.query('code in ["004001","004002","004003","004004","004005"]') date_object ='%04d%02d%02d%02d'%(int(df_time['Data'].to_list()[0]), int(df_time['Data'].to_list()[1]), int(df_time['Data'].to_list()[2]), int(df_time['Data'].to_list()[3])) date_object=datetime.strptime(date_object, "%Y%m%d%H") #(date_object + timedelta(hours=x)).strftime("%Y%m%d%H%M") df_center = df.query('code in ["010051","005002","006002"] and model_sgn in [1]') df_center2 = df.query('code in ["010051","005002","006002"] and model_sgn in [4,5]') df_max = df.query('code in ["011012","005002","006002","004024"] and model_sgn in [3]') # 1 storm center and 3 maximum wind speed https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/6/TABLE_CODE_FLAG/008005 latc,lonc,pcen,frcst_type,ensambles=[],[],[],[],[] for names, group in df_center.groupby("ensamble_num"): latc.append(list(group[group.code=="005002"]['Data'].values)) lonc.append(list(group[group.code=="006002"]['Data'].values)) pcen.append(list(group[group.code=="010051"]['Data'].values)) lat,lon,vmax,vhr=[],[],[],[] for names, group in df_max.groupby("ensamble_num"): lat.append(list(group[group.code=="005002"]['Data'].values)) lon.append(list(group[group.code=="006002"]['Data'].values)) vmax.append(list(group[group.code=="011012"]['Data'].values)) vhr.append(list(group[group.code=="004024"]['Data'].values)) frcst_type.append(list(np.unique(group.frcst_type.values))[0]) ensambles.append(names) latc1,lonc1,pcen1=[],[],[] for names, group in df_center2.groupby("ensamble_num"): latc1.append(list(group[group.code=="005002"]['Data'].values)) lonc1.append(list(group[group.code=="006002"]['Data'].values)) pcen1.append(list(group[group.code=="010051"]['Data'].values)) for i in range(len(pcen1)): pcen1[i].extend(pcen[i]) vhr=['0','6', '12', '18', '24', '30', '36', '42', '48', '54', '60', '66', '72', '78', '84', '90', '96', '102', '108'] for i in range(len(ensambles)): wind=[np.nan if value=='None' else float(value) for value in vmax[i]] pre=[np.nan if value=='None' else float(value)/100 for value in pcen1[i]] lon_=[np.nan if value=='None' else float(value) for value in lon[i]] lat_=[np.nan if value=='None' else float(value) for value in lat[i]] lon1_=[np.nan if value=='None' else float(value) for value in lonc[i]] lat1_=[np.nan if value=='None' else float(value) for value in latc[i]] max_radius=np.sqrt(np.square(np.array(lon_)-np.array(lon1_))+np.square(np.array(lat_)-np.array(lat1_)))*110 timestamp=[(date_object + timedelta(hours=int(value))).strftime("%Y%m%d%H%M") for value in vhr] timestep_int=[int(value) for value in vhr] ['TRUE' if frcst_type[i]==4 else 'False'] track = xr.Dataset( data_vars={ 'max_sustained_wind': ('time', wind), 'central_pressure': ('time', pre), 'ts_int': ('time', timestep_int), 'max_radius': ('time', max_radius), 'lat': ('time', lat_), 'lon': ('time', lon_), }, coords={'time': timestamp, }, attrs={ 'max_sustained_wind_unit': 'm/s', 'central_pressure_unit': 'mb', 'name': typhoon_name, 'sid': 'NA', 'orig_event_flag': False, 'data_provider': 'ECMWF', 'id_no': 'NA', 'ensemble_number': ensambles[i], 'is_ensemble': ['TRUE' if frcst_type[i]==4 else 'False'][0], 'forecast_time': date_object.strftime("%Y%m%d%H%M"), 'basin': 'WP', 'category': 'NA', }) track = track.set_coords(['lat', 'lon']) list_df.append(track) #%% date_object ='%04d%02d%02d%02d'%(int([line.split()[-1] for line in StringIO(text_data) if line[6:17].upper=="004001 YEAR" ][0]), int([line.split()[-1] for line in StringIO(text_data) if line[6:18].upper=="004002 MONTH" ][0]), int([line.split()[-1] for line in StringIO(text_data) if line[6:16].upper=="004003 DAY" ][0]), int([line.split()[-1] for line in StringIO(text_data) if line[6:17].upper=="004004 HOUR" ][0])) date_object=datetime.strptime(date_object, "%Y%m%d%H%M") val_t = [int(line.split()[-1]) for num, line in enumerate(StringIO(text_data), 1) if line[6:40].upper=="004024 TIME PERIOD OR DISPLACEMENT"]# and link.endswith(('.html', '.xml'))] val_wind = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="011012" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] val_pre = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="010051" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] val_lat = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="005002" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] val_lon = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="006002" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] val_ens = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="001091" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] val_dis = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="008005" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] if len(val_ens) >1: val_t=val_t[0:int(len(val_t)/len(val_ens))] val_t.insert(0, 0) val_ensamble=duplicate(val_ens, int(len(val_wind)/len(val_ens))) val_time=val_t* len(val_ens) #52 else: val_ensamble='NA' val_t.insert(0, 0) val_time=val_t ecmwf_df=pd.DataFrame({'lon': val_lon,'lat': val_lat,'met_dis': val_dis }) ecmwf_center=ecmwf_df[ecmwf_df['met_dis']=='1'] ecmwf_df2=pd.DataFrame({'STORMNAME':STORMNAME, 'time':val_time, 'lon':ecmwf_center['lon'].values, 'lat':ecmwf_center['lat'].values, 'windsped':val_wind, 'pressure':val_pre, 'ens': val_ensamble}) ecmwf_df2['YYYYMMDDHH']=ecmwf_df2['time'].apply(lambda x: (date_object + timedelta(hours=x)).strftime("%Y%m%d%H%M") ) dict1=[] ecmwf_df2=ecmwf_df2.replace(['None'],np.nan) typhoon_df=pd.DataFrame() typhoon_df[['YYYYMMDDHH','LAT','LON','VMAX','PRESSURE','STORMNAME','ENSAMBLE']]=ecmwf_df2[['YYYYMMDDHH','lat','lon','windsped','pressure','STORMNAME','ens']] typhoon_df[['LAT','LON','VMAX']] = typhoon_df[['LAT','LON','VMAX']].apply(pd.to_numeric) typhoon_df['VMAX'] = typhoon_df['VMAX'].apply(lambda x: x*1.94384449*1.05) #convert to knotes typhoon_df.to_csv(os.path.join(Input_folder,'ECMWF_%s_%s_%s.csv'%(Input_folder.split('/')[-3],STORMNAME,model_name)),index=False)
#%% Input_folder='C:/Users/ATeklesadik/OneDrive - Rode Kruis/Documents/documents/Typhoon-Impact-based-forecasting-model/temp/' #date_object ='%04d%02d%02d%02d'%(int( [line.split()[-1] for line in StringIO(text_data) if line[6:17].upper=="004001 YEAR" ] path_ecmwf=os.path.join(Input_folder,'ecmwf/') #1=Storm Centre 4 = Location of the storm in the perturbed analysis #5 = Location of the storm in the analysis #3=Location of maximum wind ecmwf_files = [f for f in listdir(path_ecmwf) if isfile(join(path_ecmwf, f))] #ecmwf_files = [file_name for file_name in ecmwf_files if file_name.startswith('A_JSXX02ECEP')] list_df=[] ecmwf_file=ecmwf_files[1] with open(os.path.join(path_ecmwf,ecmwf_file), 'rb') as bin_file: bufr = decoder.process(bin_file.read()) text_data = FlatTextRenderer().render(bufr) # Convert the BUFR message to JSON # #%% list1=[] with StringIO(text_data) as input_data: # Skips text before the beginning of the interesting block: for line in input_data: if line.startswith('<<<<<< section 4 >>>>>>'): # Or whatever test is needed break # Reads text until the end of the block: for line in input_data: # This keeps reading the file if line.startswith('<<<<<< section 5 >>>>>>'): break