Exemplo n.º 1
0
def pprocess_pline(text):

	lines = text.split('\n')
	## initialize three dicts
	pos_tags_dict = {}
	parse_tree_dict = {}
	parse_dependency_dict = {}

	curr_loc_read = 0 
	lines_processed = 0 
	dependency_list = [] 
	for line in lines:
		line = line.strip()
		if (not line):
			continue
		if(utility.isEmpty(line)):
			continue
		line = clean_str(line)
		# first part of each line is the pos tag
		if(curr_loc_read==0):
			#line = remove_tagged_comma(line) 
			pos_tags_dict[lines_processed] = line
			curr_loc_read += 1
		# second part of each line is the parse tree	
		elif(curr_loc_read==1):
			parse_tree_dict[lines_processed] = line 
			curr_loc_read += 1 
		# third part of each line is the dependency list 	
		elif(curr_loc_read==2):
			m = re.match("\*\*\*",line)	
			if(m):
					parse_dependency_dict[lines_processed]=dependency_list # makes a copy
					# move to a next line in text 
					lines_processed += 1
					# initialize vars for next line 
					curr_loc_read = 0 
					dependency_list = []
			else:		
				dependency_list.append(line)	
			
	return (pos_tags_dict,parse_tree_dict,parse_dependency_dict)
Exemplo n.º 2
0
def pprocess_pfile(filename):

	## initialize three dicts
	pos_tags_dict = {}
	parse_tree_dict = {}
	parse_dependency_dict = {}

	# read the pos tagged sentence and tokenize the sentence  
	fd = open(filename)
	curr_loc_read = 0 
	lines_processed = 0 
	dependency_list = [] 
	for line in fd:
		if(utility.isEmpty(line)):
			continue
		line = clean_str(line)
		# first part of each line is the pos tag
		if(curr_loc_read==0):
			#line = remove_tagged_comma(line) 
			pos_tags_dict[lines_processed] = line
			curr_loc_read += 1
		# second part of each line is the parse tree	
		elif(curr_loc_read==1):
			parse_tree_dict[lines_processed] = line 
			curr_loc_read += 1 
		# third part of each line is the dependency list 	
		elif(curr_loc_read==2):
			m = re.match("\*\*\*",line)	
			if(m):
					parse_dependency_dict[lines_processed]=dependency_list # makes a copy
					# move to a next line in text 
					lines_processed += 1
					# initialize vars for next line 
					curr_loc_read = 0 
					dependency_list = []
			else:		
				dependency_list.append(line)	
			
	fd.close()
	return (pos_tags_dict,parse_tree_dict,parse_dependency_dict)