Пример #1
0
def word_tokenize(data):
	if data == None:
		return ret_failure(701)
	try:
		return ret_success(nltk.word_tokenize(data))
	except:
		return ret_failure(701)
Пример #2
0
def stemmer(method,data):
	"""
	Takes an array of words in JSON format.
	"""
	data = parse_input(data)
	if data == False:
		return ret_failure(703)
	else:
		res=[]
		if method == "lancaster":
			for word in data:
				try:
					res.append([word,LancasterSt.stem(word)])
				except:
					return ret_failure(702)
		elif method == "porter":
			for word in data:
				try:
					res.append([word,PorterSt.stem(word)])
				except:
					return ret_failure(702)
		elif method == 'snowball':
			for word in data:
				try:
					res.append([word,SnowballSt.stem(word)])
				except:
					return ret_failure(702)
		else:
			abort(404)
		return ret_success(res)
Пример #3
0
def lemmatize(method, data):
    """
	Takes an array of words or array of tupples containing words and pos tags.
	Both Penn and Wordnet tags are supported
	"""
    data = parse_input(data)
    if data == False:
        return ret_failure(703)
    else:
        res = []
        if method == "wordnet":
            for word in data:
                try:
                    if type(word) is list:
                        res.append([
                            word[0],
                            WordnetLm.lemmatize(word[0], penn_to_wn(word[1]))
                        ])
                    else:
                        res.append([word, WordnetLm.lemmatize(word)])
                except LookupError:
                    return ret_failure(704)
                except:
                    return ret_failure(702)
        else:
            abort(404)
        return ret_success(res)
Пример #4
0
def stemmer(method, data):
    """
	Takes an array of words in JSON format.
	"""
    data = parse_input(data)
    if data == False:
        return ret_failure(703)
    else:
        res = []
        if method == "lancaster":
            for word in data:
                try:
                    res.append([word, LancasterSt.stem(word)])
                except:
                    return ret_failure(702)
        elif method == "porter":
            for word in data:
                try:
                    res.append([word, PorterSt.stem(word)])
                except:
                    return ret_failure(702)
        elif method == 'snowball':
            for word in data:
                try:
                    res.append([word, SnowballSt.stem(word)])
                except:
                    return ret_failure(702)
        else:
            abort(404)
        return ret_success(res)
Пример #5
0
def sent_tokenize(data):
	if data == None:
		return ret_failure(701)
	try:
		res = nltk.sent_tokenize(data)
		return ret_success(res)
	except:
		return ret_failure(702)
Пример #6
0
def sent_tokenize(data):
	if data == None:
		return ret_failure(701)
	try:
		tok = nltk.data.load('tokenizers/punkt/english.pickle')
		#return tok.tokenize(data) #nltk.sent_tokenize(data)
		return ret_success(tok.tokenize(data))
	except:
		return ret_failure(702)
Пример #7
0
def tagger(data):
	try:
		st=NERTagger('./nltk-data/StanfordNER/english.all.3class.distsim.crf.ser.gz','./nltk-data/StanfordNER/stanford-ner.jar')
	except:
		return ret_failure(705)
	#try:
	tag = st.tag(data.split())
	#except:
	#	return ret_failure(702)
	return ret_success(tag)
Пример #8
0
def pos_tag(data):
	data = parse_input(data)
	if data == False:
		return ret_failure(703)
	else:
		try:
			res = nltk.pos_tag(data)
			return ret_success(res)
		except LookupError: 
			return ret_failure(704)
		except:
			return ret_failure(702)
Пример #9
0
def tagger(data):
    try:
        st = NERTagger(
            './nltk-data/StanfordNER/english.all.3class.distsim.crf.ser.gz',
            './nltk-data/StanfordNER/stanford-ner.jar')
    except:
        return ret_failure(705)
    #try:
    tag = st.tag(data.split())
    #except:
    #	return ret_failure(702)
    return ret_success(tag)
Пример #10
0
def lemmatize(method,data):
	"""
	Takes an array of words or array of tupples containing words and pos tags.
	Both Penn and Wordnet tags are supported
	"""
	data = parse_input(data)
	if data == False:
		return ret_failure(703)
	else:
		res=[]
		if method == "wordnet":
			for word in data:
				try:
					if type(word) is list:
						res.append([word[0],WordnetLm.lemmatize(word[0],penn_to_wn(word[1]))])
					else:	
						res.append([word,WordnetLm.lemmatize(word)])
				except LookupError: 
					return ret_failure(704)
				except:
					return ret_failure(702)
		else:
			abort(404)
		return ret_success(res)