Пример #1
0
def __cut_DAG(sentence):
	DAG = get_DAG(sentence)
	route ={}
	calc(sentence,DAG,0,route=route)
	x = 0
	buf =u''
	N = len(sentence)
	while x<N:
		y = route[x][1]+1
		l_word = sentence[x:y]
		if y-x==1:
			buf+= l_word
		else:
			if len(buf)>0:
				if len(buf)==1:
					yield buf
					buf=u''
				else:
					regognized = finalseg.__cut(buf)
					for t in regognized:
						yield t
					buf=u''
			yield l_word		
		x =y

	if len(buf)>0:
		if len(buf)==1:
			yield buf
		else:
			regognized = finalseg.__cut(buf)
			for t in regognized:
				yield t
Пример #2
0
def __cut_DAG(sentence):
    DAG = get_DAG(sentence)
    route ={}
    calc(sentence,DAG,0,route=route)
    x = 0
    buf =u''
    N = len(sentence)
    while x<N:
        y = route[x][1]+1
        l_word = sentence[x:y]
        if y-x==1:
            buf+= l_word
        else:
            l = len(buf)
            if l>0:
                if l==1:
                    yield (buf, x-1)
                    buf=u''
                else:
                    regognized = finalseg.__cut(buf)
                    for t, p in regognized:
                        yield (t, x-l+p)
                    buf=u''
            yield (l_word, x)
        x =y

    l = len(buf)
    if l>0:
        if l==1:
            yield (buf,x-1)
        else:
            regognized = finalseg.__cut(buf)
            for t, p in regognized:
                yield (t, x-l+p)
Пример #3
0
def __cut_DAG(sentence):
    DAG = get_DAG(sentence)
    route = {}
    calc(sentence, DAG, 0, route=route)
    x = 0
    buf = u''
    N = len(sentence)
    while x < N:
        y = route[x][1] + 1
        l_word = sentence[x:y]
        if y - x == 1:
            buf += l_word
        else:
            l = len(buf)
            if l > 0:
                if l == 1:
                    yield (buf, x - 1)
                    buf = u''
                else:
                    regognized = finalseg.__cut(buf)
                    for t, p in regognized:
                        yield (t, x - l + p)
                    buf = u''
            yield (l_word, x)
        x = y

    l = len(buf)
    if l > 0:
        if l == 1:
            yield (buf, x - 1)
        else:
            regognized = finalseg.__cut(buf)
            for t, p in regognized:
                yield (t, x - l + p)
Пример #4
0
def __cut_DAG(sentence):
	N = len(sentence)
	i,j=0,0
	p = trie
	DAG = {}
	while i<N:
		c = sentence[j]
		if c in p:
			p = p[c]
			if '' in p:
				if not i in DAG:
					DAG[i]=[]
				DAG[i].append(j)
			j+=1
			if j>=N:
				i+=1
				j=i
				p=trie
		else:
			p = trie
			i+=1
			j=i
	for i in xrange(len(sentence)):
		if not i in DAG:
			DAG[i] =[i]
	#pprint.pprint(DAG)
	route ={}
	calc(sentence,DAG,0,route=route)
	x = 0
	buf =u''
	while x<N:
		y = route[x][1]+1
		l_word = sentence[x:y]
		if y-x==1:
			buf+= l_word
		else:
			if len(buf)>0:
				if len(buf)==1:
					yield buf
					buf=u''
				else:
					regognized = finalseg.__cut(buf)
					for t in regognized:
						yield t
					buf=u''
			yield l_word		
		x =y

	if len(buf)>0:
		if len(buf)==1:
			yield buf
		else:
			regognized = finalseg.__cut(buf)
			for t in regognized:
				yield t
Пример #5
0
def __cut_DAG(sentence):
	N = len(sentence)
	i,j=0,0
	p = trie
	DAG = {}
	while i<N:
		c = sentence[j]
		if c in p:
			p = p[c]
			if '' in p:
				if not i in DAG:
					DAG[i]=[]
				DAG[i].append(j)
			j+=1
			if j>=N:
				i+=1
				j=i
				p=trie
		else:
			p = trie
			i+=1
			j=i
	for i in xrange(len(sentence)):
		if not i in DAG:
			DAG[i] =[i]
	#pprint.pprint(DAG)
	route ={}
	calc(sentence,DAG,0,route=route)
	x = 0
	buf =u''
	while x<N:
		y = route[x][1]+1
		l_word = sentence[x:y]
		if y-x==1:
			buf+= l_word
		else:
			if len(buf)>0:
				if len(buf)==1:
					yield buf
					buf=u''
				else:
					regognized = finalseg.__cut(buf)
					for t in regognized:
						yield t
					buf=u''
			yield l_word		
		x =y

	if len(buf)>0:
		if len(buf)==1:
			yield buf
		else:
			regognized = finalseg.__cut(buf)
			for t in regognized:
				yield t