示例#1
0
def remove_npstuff(sent):
	extrcts = []

	spl = re.split('(\[.*?\])', sent)
	# loop over all whole extracted pattern and find the []/NP pattern for cleaning 
	for e in spl:

		lspl = len(e)
		if lspl < 4:
			extrcts.append(e)
		elif e[0] == '[' and e[-1] == ']':
			extrct = utility.np_cleaner(e)
			if extrct != '':
				extrcts.append(extrct)
		else:
			extrcts.append(e)
	return ' '.join(extrcts)	
示例#2
0
def get_np(word,parsed_sent,is_front,meta):

	out_list  = []
	word = word.strip()
	parsed_sent = parsed_sent.strip()
	if(is_front):
		# ABC WAS ASSASINATED 
		# splits according to first occurence of word  
		temp_arr = parsed_sent.split(word)
		if(len(temp_arr)) > 2:
			if(DEBUG):
				print "sent has more than two "+word+" parsed sent = "+parsed_sent
			for i in xrange(len(temp_arr)-1):
				first_half  = temp_arr[i]
				# search for NP thing here
				if(DEBUG):
					print "first part",first_half
				# check if there was a half split 	
				#m_half = re.search(PATT_LHALF_NP,first_half)
				m_half = re.findall(PATT_LHALF_NP,first_half)
				m = re.findall(PATT_NP,first_half)
				if m_half:
					#np = m_half.group(0)
					for np in m_half:
						np_clean  = utility.np_cleaner(np)
						if (np_clean):
							out_list.append(np_clean)
				if m:
					#  we need the rightmost pattern found m[-1] not necessarily for Front patterns look at all NP ? 
					for np in m:
					#np = m[-1]
						np_clean  = utility.np_cleaner(np)
						if (np_clean):
							out_list.append(np_clean)
		else:	
			first_half  = temp_arr[0]
			# search for NP thing here
			if (DEBUG):
				print "first half",first_half
			# check if there was a half split 	
			#m_half = re.search(PATT_LHALF_NP,first_half)
			m_half = re.findall(PATT_LHALF_NP,first_half)
			m = re.findall(PATT_NP,first_half)
			if m_half:
				
				#np = m_half.group(0)
				for np in m_half:
					np_clean  = utility.np_cleaner(np)
					if(np_clean):
						out_list.append(np_clean)
			if m:
				#  we need the rightmost pattern found m[-1] 
				for np in m:
				#np = m[-1]
					np_clean  = utility.np_cleaner(np)
					if (np_clean):
						out_list.append(np_clean)
	else:
		# murder of DEf 

		temp_arr = parsed_sent.split(word)
		if(len(temp_arr)) > 2:
			# murder of dEF and murder of eFg and murder of xyz
			if(DEBUG):
				print "sent has more than two "+word+"parsed sent = "+parsed_sent
			for i in xrange(len(temp_arr)-1):
				# for e.g for two instance of murder we will have temp_arr[1] and temp_arr[2]
				second_half  = temp_arr[i+1]
				# search for NP thing here
				if(DEBUG):
					print "second part",second_half
				# search usually progresses from left to right so this should be good
				#m_half = re.search(PATT_RHALF_NP,second_half)
				m_half = re.findall(PATT_RHALF_NP,second_half)
				#m = re.search(PATT_NP,second_half)
				m = re.findall(PATT_NP,second_half)
				if m_half:
					#np = m_half.group(0)
					for np in m_half:
						np_clean  = utility.np_cleaner(np)
						if (np_clean):
							out_list.append(np_clean)
				if m:
					#np = m.group(0)
					for np in m:
						np_clean  = utility.np_cleaner(np)
						if (np_clean):
							out_list.append(np_clean)
		elif(len(temp_arr) == 2):	
			second_half  = temp_arr[1]
			# search for NP thing here
			if(DEBUG):
				print "second half",second_half
			# search usually progresses from left to right so this should be good
			#m_half = re.search(PATT_RHALF_NP,second_half)
			m_half = re.findall(PATT_RHALF_NP,second_half)
			#m = re.search(PATT_NP,second_half)
			m = re.findall(PATT_NP,second_half)
			if m_half:
				#np = m_half.group(0)
				for np in m_half:
					np_clean  = utility.np_cleaner(np)
					if (np_clean):
						out_list.append(np_clean)
			if m:
				#np = m.group(0)
				for np in m:
					np_clean  = utility.np_cleaner(np)
					if (np_clean):
						out_list.append(np_clean)
	
	out_set = set(out_list)
	# further process this list 
	out_list = list(out_set)
	new_list = utility.common_cleaner(out_list)
	if(meta =='victim'):
		new_list = utility.victim_cleaner(new_list)
		if(DEBUG):
			print "####victim removal list"
	elif(meta == 'target'):
		new_list = utility.target_cleaner(new_list)
		if(DEBUG):
			print "####target removal list"
	elif(meta == 'perpi'):
		new_list = utility.perpi_cleaner(new_list)
		if(DEBUG):
			print "####perp removal list"

	if(DEBUG):		
		print new_list

	return new_list