Пример #1
0
def getbasenps(pospath, pennpath):
   symbols = reader.readsymbols(pennpath)
   n = 9
   for possent in posio.posread(pospath):
      base_nps = []
      for index in range(len(possent)):
#         possymbol = '/'.join(possent[index])
         symbol = symbols.next().replace('(', '-LRB-').replace(')', '-RRB-').replace('{', '-LCB-').replace('}', '-RCB-')
         lastn = 0
         while symbol[:symbol.rfind('/')] != possent[index][0]:
            if lastn != n:
               posio.posprint(possent)
               print n
               lastn = n
            print symbol
            if symbol == '[':
               assert not base_nps or base_nps[-1][1] != None
               base_nps.append([index, None])
            elif symbol == ']':
               if base_nps:
                  assert base_nps and base_nps[-1][1] == None
                  base_nps[-1][1] = index
	    symbol = symbols.next().replace('(', '-LRB-').replace(')', '-RRB-').replace('{', '-LCB-').replace('}', '-RCB-')
      if base_nps and base_nps[-1][1] == None:
#	 symbol = symbols.next().replace('(', '-LRB-').replace(')', '-RRB-').replace('{', '-LCB-').replace('}', '-RCB-')
#         assert symbol == ']'
	 base_nps[-1][1] = index
      assert not base_nps or base_nps[-1][1]!=None
      print base_nps
      n += 1
Пример #2
0
def randompos(path, cutoff):
    for p in posio.posread(path):
        n = 0
        for w, t in p:
            if random.random() < cutoff:
                p[n][1] = '_NONE-'
            n += 1
        posio.posprint(p)
Пример #3
0
def batch_penn_to_pos(base, sections):
   sections = sections.split('-')
   assert len(sections) == 1 or len(sections) == 2
   if len(sections) == 1:
      sections = [int(sections[0])]
   else:
      sections = range(int(sections[0]), int(sections[1]))
   for sec in sections:
      path = os.path.join(base, '%02d'%(sec,))
      for file in os.listdir(path):
         assert file.endswith('.pos')
         for sent in penn2pos(os.path.join(path, file)):
            posio.posprint(sent)
Пример #4
0
import sys
import posio
import replace

# how many sentences
N = 5000

f = sys.argv[1]
r = replace.CReplace(sys.argv[2])
i = 0
for sent in posio.posread(f, '_'):
    if i < N:
        for n in range(len(sent)):
            sent[n][0] = r.replace(sent[n][0])
    i += 1
    posio.posprint(sent, '_')
Пример #5
0
import sys
import posio
import replace

# how many sentences
N=5000

f = sys.argv[1]
r = replace.CReplace(sys.argv[2])
i = 0
for sent in posio.posread(f, '_'):
   if i < N:
      for n in range(len(sent)):
         sent[n][0] = r.replace(sent[n][0])
   i += 1; posio.posprint(sent, '_')