Exemplo n.º 1
0
def compute_mod_stats1(num_evens, mod):
    evens = [0] * num_evens
    mod_is_even = mod % 2 == 0

    count = [0] * mod
    prime_mods = [p % mod for p in primes]
    for m in prime_mods:
        count[m] += 1

    print_mod_count(count)

    from itertools import count as icount

    for i, p1, m1 in zip(icount(), primes, prime_mods):
        if i & 127 == 0:
            print("\rComputing sums p1 + p2 where p1 =",
                  p1,
                  "and p2 >= p1 ...",
                  end="")
            sys.stdout.flush()
        for j, p2, m2 in zip(icount(), primes[i:], prime_mods[i:]):
            k = ((p1 + p2) >> 1) - 3
            if k >= num_evens:
                if j == 0:
                    print("\nDone @ prime[{}] = {}".format(i, p1))
                    return evens
                break

            # When mod is 4, m1 + m2 can be 2 (1+1), 4 (1+3 or 3+1), or 6 (3+3)
            if mod_is_even:
                evens[k] |= 1 << (((m1 + m2) >> 1) - 1)
            else:
                evens[k] |= 1 << (m1 + m2)
    return evens
Exemplo n.º 2
0
 def __call__(self):
     """print out current history information with line number"""
     length = get_current_history_length()
     if length > 1:
         kount = icount(1).next
         for command in imap(get_history_item, xrange(1, length)):
             print '%s\t%s' % (kount(), command)
Exemplo n.º 3
0
Arquivo: city.py Projeto: kimch2/x8623
 def get_events(self, p):
     if not p.city_contend_events:
         configs = get_config(CityEventConfig)
         start = p.city_contend_total_step + 1
         events = []
         c = 0
         for i in icount(start):
             i = i % len(configs) or len(configs)
             config = configs.get(i)
             if not config:
                 continue
             if self.is_top_faction(p.factionID):
                 events.append({
                     "type": config.defend_event_type,
                     "argv": config.defend_event_argv
                 })
             else:
                 events.append({
                     "type": config.attack_event_type,
                     "argv": config.attack_event_argv
                 })
             if self.is_top_faction(p):
                 if config.defend_event_type == CityContendEventType.End:
                     break
             else:
                 if config.attack_event_type == CityContendEventType.End:
                     break
             c += 1
             if c > len(configs):
                 break
         p.city_contend_events = events
         p.save()
     return list(p.city_contend_events)
Exemplo n.º 4
0
def continuation(func, x, mu, delta, **opt):
    """

    Parameters
    -----------
    func : (numpy.array, float) -> numpy.array
        :math:`F(x, \mu)`
        :code:`func(x, mu)` must have same dimension of :code:`x`
    x : numpy.array
        Initial point of continuation, and satisfies :math:`F(x, \mu) = 0`
    mu : float
        Initial parameter of continuation, and satisfies :math:`F(x, \mu) = 0`
    delta : float
        step length of continuation.
        To decrease the parameter, you should set negative value.

    """
    logger = Logger(__name__, "Continuation")
    for t in icount():
        mu += delta
        f = lambda x: func(x, mu)
        x = newton.newton_krylov_hook(f, x, **opt)
        logger.info({
            "count": t,
            "mu": mu,
        })
        yield x, mu
Exemplo n.º 5
0
    def trusted_region(self, v, eps, r0=None, p=2):
        """ Estimate the trusted region in which the deviation is smaller than `eps`.

        Parameters
        ------------
        eps : float
            Destination value of deviation
        p : float, optional (default=2)
            Iteration will end if the deviation is in `[eps/p, eps*p]`.

        Returns
        --------
        r : float
            radius of the trusted region

        """
        if type(r0) is float:
            r = r0
        else:
            r = 100*self.alpha
        v = v / np.linalg.norm(v)
        p = max(p, 1.0/p)
        for c in icount():
            e = self(r*v)
            self.logger.info({
                "count": c,
                "deviation": e,
            })
            if (e > eps/p) and (e < eps*p):
                return r
            r = r * np.sqrt(eps / e)
Exemplo n.º 6
0
 def __call__(self):
     """print out current history information with line number"""
     length = get_current_history_length()
     if length > 1:
         kount = icount(1).next
         for command in imap(get_history_item, xrange(1, length)):
             print '%s\t%s' % (kount(), command)
Exemplo n.º 7
0
def continuation(func, x, mu, delta, **opt):
    """

    Parameters
    -----------
    func : (numpy.array, float) -> numpy.array
        :math:`F(x, \mu)`
        :code:`func(x, mu)` must have same dimension of :code:`x`
    x : numpy.array
        Initial point of continuation, and satisfies :math:`F(x, \mu) = 0`
    mu : float
        Initial parameter of continuation, and satisfies :math:`F(x, \mu) = 0`
    delta : float
        step length of continuation.
        To decrease the parameter, you should set negative value.

    """
    logger = Logger(__name__, "Continuation")
    for t in icount():
        mu += delta
        f = lambda x: func(x, mu)
        x = newton.newton_krylov_hook(f, x, **opt)
        logger.info({
            "count": t,
            "mu": mu,
        })
        yield x, mu
Exemplo n.º 8
0
def gen(input_string=None, dictionary='./en', debug=False):
    """
    generates a 'my first is in x but not in y' type riddle
    from an arbitrary string.
    Wordlist is from dictionary. - can specify
    """

    with open(dictionary, 'r') as f:
        word_list = f.read().split('\n')
        word_list = list(filter(None, word_list))
        word_list = [word.casefold() for word in word_list]
    if input_string is None:
        input_string = input('please enter string to generate riddle from')
    if not isinstance(input_string, str):
        raise ValueError('Input is not string')
    firstword = []
    secondword = []
    for character in input_string:
        if character.isalpha():
            wwl, wwol = wlists(character, word_list)
            wwl2 = []
            wwol2 = []
            for otheralpha in 'abcdefghijklmnopqrstuvwxyz'.replace(
                    character, ''):
                for wi, wo in zip(wwl, wwol):
                    if otheralpha not in wi or otheralpha in wo:
                        wwl2.append(wi)
                        wwol2.append(wo)
            if len(wwl2) == 0 or len(wwol2) == 0:
                if debug:
                    print('words with = ', wwl2)
                    print('words without = ', wwol2)
                raise ValueError(
                    'No words in the dictionary work for letter {0}'.format(
                        character))

            wind = np.random.randint(0, len(wwl2))
            woind = np.random.randint(0, len(wwol2))
            firstword.append(wwl2[wind])
            secondword.append(wwol2[woind])
        else:
            firstword.append(None)
            secondword.append(None)

    for count, fw, sw, ch in zip(icount(), firstword, secondword,
                                 input_string):
        if debug:
            print(count, fw, sw, ch)
        if fw is None or sw is None:
            rstring = ch
        else:
            rstring = 'My {0} is in {1} but not in {2}\n'.format(
                ordinal(count + 1), fw, sw)
        print(rstring)

    print('What am I?')
Exemplo n.º 9
0
def continuation(func, x, mu, delta, **opt):
    """ Generator for continuation of a vector function :math:`F(x, \mu)`

    Using Newton-Krylov-Hook algorithm in each of continuation steps.

    Parameters
    -----------
    func : (numpy.array, float) -> numpy.array
        :math:`F(x, \mu)`
        :code:`func(x, mu)` must have same dimension of :code:`x`
    x : numpy.array
        Initial point of continuation, and satisfies :math:`F(x, \mu) = 0`
    mu : float
        Initial parameter of continuation, and satisfies :math:`F(x, \mu) = 0`
    delta : float
        step length of continuation.
        To decrease the parameter, you should set negative value.

    Yields
    -------
    x : numpy.array
        :math:`x`
    mu : float
        :math:`\mu`

    """
    logger = Logger(__name__, "Continuation")
    xi = concat(x, mu)
    dxi = concat(np.zeros_like(x), delta)
    for t in icount():
        logger.info({
            "count": t,
            "mu": xi[-1],
        })
        yield xi[:-1], xi[-1]
        dxi = concat(*tangent_vector(func, xi[:-1], xi[-1], dxi=dxi, **opt))
        xi0 = xi + abs(delta) * dxi
        f = lambda z: concat(func(z[:-1], z[-1]), np.dot(z - xi0, dxi))
        xi = newton.newton_krylov_hook(f, xi, **opt)
        logger.debug({
            "count": t,
            "|f(x)|": np.linalg.norm(func(xi[:-1], xi[-1])),
            "dmu": abs(delta) * dxi[-1],
            "delta mu": xi[-1] - xi0[-1],
            "(dxi, xi-xi0)": np.dot(xi - xi0, dxi),
        })
Exemplo n.º 10
0
def continuation(func, x, mu, delta, **opt):
    """ Generator for continuation of a vector function :math:`F(x, \mu)`

    Using Newton-Krylov-Hook algorithm in each of continuation steps.

    Parameters
    -----------
    func : (numpy.array, float) -> numpy.array
        :math:`F(x, \mu)`
        :code:`func(x, mu)` must have same dimension of :code:`x`
    x : numpy.array
        Initial point of continuation, and satisfies :math:`F(x, \mu) = 0`
    mu : float
        Initial parameter of continuation, and satisfies :math:`F(x, \mu) = 0`
    delta : float
        step length of continuation.
        To decrease the parameter, you should set negative value.

    Yields
    -------
    x : numpy.array
        :math:`x`
    mu : float
        :math:`\mu`

    """
    logger = Logger(__name__, "Continuation")
    xi = concat(x, mu)
    dxi = concat(np.zeros_like(x), delta)
    for t in icount():
        logger.info({"count": t, "mu": xi[-1], })
        yield xi[:-1], xi[-1]
        dxi = concat(*tangent_vector(func, xi[:-1], xi[-1], dxi=dxi, **opt))
        xi0 = xi + abs(delta) * dxi
        f = lambda z: concat(func(z[:-1], z[-1]), np.dot(z-xi0, dxi))
        xi = newton.newton_krylov_hook(f, xi, **opt)
        logger.debug({
            "count": t,
            "|f(x)|": np.linalg.norm(func(xi[:-1], xi[-1])),
            "dmu": abs(delta)*dxi[-1],
            "delta mu": xi[-1] - xi0[-1],
            "(dxi, xi-xi0)": np.dot(xi-xi0, dxi),
        })
Exemplo n.º 11
0
Arquivo: base.py Projeto: fcua/x8623
    def validate(cls, csvrow, lineno=None):
        cleaned = {}
        for f in cls.fields_list:
            v = None
            if not f.repeated:
                try:
                    v = csvrow[f.column_name]
                    v = f.validate(v, lineno)
                except KeyError:
                    if cls._current_table not in f.allowmiss:
                        raise ValidationError(u'第 %s 行没有字段 %s' %
                                              (lineno, f.column_name))
                    v = getattr(f, 'default', None)
            else:
                data = []
                is_infinite = isinstance(f.range, (int, long))  #是否不指定终止点
                range = icount(f.range) if is_infinite else f.range
                for i in range:
                    column_name = f.column_name + str(i)
                    try:
                        v = csvrow[column_name]
                    except KeyError:
                        if is_infinite or cls._current_table in f.allowmiss:
                            break
                        else:
                            raise ValidationError(u'第 %s 行没有字段 %s' %
                                                  (lineno, column_name))
                    v = f.validate(v, lineno)

                    if v is not None and not (not v and f.skipzero):  #过滤零
                        data.append(v)

                v = data

            if v is not None:
                cleaned[f.name] = v

        for f in cls.fields_list:
            if hasattr(cls, 'validate_%s' % f.name):
                getattr(cls, 'validate_%s' % f.name)(cleaned[f.name], cleaned)

        return cleaned
Exemplo n.º 12
0
def newton_krylov_hook_gen(func, x0, trusted_region,
                           newton_krylov_tol_ratio, **opt):
    """ Generator of Newton-Krylov-hook iteration

    Yields
    -------
    x : numpy.array
        :math:`x_n`
    residual : float
        :math:`|F(x_n)|`
    fx : numpy.array
        :math:`F(x_n)`
    """
    logger = Logger(__name__, "NewtonKrylovHook")
    nu = 0.0
    for t in icount():
        fx = func(x0)
        res = norm(fx)
        logger.info({
            "count": t,
            "residual": res,
        })
        yield x0, res, fx
        A = Jacobi(func, x0, fx=fx, **opt)
        b = -fx
        opt["krylov_tol"] = newton_krylov_tol_ratio * norm(b)
        V, R, g, Q = krylov.gmres_factorize(A, b, **opt)
        dx = np.dot(V[:, :len(g)], np.linalg.solve(R, g))
        dx_norm = norm(dx)
        if dx_norm < trusted_region:
            logger.info({"|dx|": dx_norm, "message": 'in Trusted region'})
            x0 = x0 + dx
        else:
            logger.info({"|dx|": dx_norm, "message": 'Hook step'})
            xi, nu = hook_step(R, g, trusted_region, nu=nu, **opt)
            dx = np.dot(V[:, :len(xi)], xi)
            x0 = x0 + dx
Exemplo n.º 13
0
pow2 = flip(pow)
R = fcp() * $takewhile($gt(1000)) * $ifilter(isOdd) * $imap($pow2(2)) * icount
for i in R(1):
    print i
print

# a bit more sophisticated currying example
R = (fcp() *
     $takewhile($gt(1000)) * $ifilter(isOdd) *
     $imap($(flip(pow))(2)) * icount)
for i in R(1):
    print i
print

# generator alternative
R = (pow(i, 2) for i in icount(1) if pow(i, 2) % 2 != 0 and pow(i, 2) < 1000)
# but it won't stop
# for i in R:
    # print i


def dotProduct(addOp, mulOp, *vectors):
    f = fcp() * $reduce(addOp) * $imap(mulOp)
    return f(*vectors)


print dotProduct(add, mul, [1, 2, 3], [4, 5, 6])
print 1 * 4 + 2 * 5 + 3 * 6
print

Exemplo n.º 14
0
def main():

  op = OptionParser()
  op.add_option('-s', '--stop_sym', default='__stop__')
  op.add_option('-o', '--output', default=None)

  opt, args = op.parse_args()

  if not len(args): exit()

  elif args[0] == 'soft-hmm-train':
  
    BEGIN_OF_SEG, END_OF_SEG, IN_SEG = range(3)
    codings = ['B 0.5 O 0.5', 
               'I 0.5 O 0.5',
               'B 0.33333 I 0.33333 O 0.33333']

    state = BEGIN_OF_SEG
    stop_sym = opt.stop_sym

    fh = open(args[1])
    try:
      for sent in fh:
        terms = sent.split()
        last = len(terms) - 1
        for i, term in enumerate(terms):
          if i == 0 or terms[i-1] == stop_sym:
            state = BEGIN_OF_SEG
  
          elif i == last or terms[i+1] == stop_sym:
            state = END_OF_SEG
  
          else:
            state = IN_SEG
  
          if term == stop_sym:
            print '__stop__ STOP 1.0'
  
          else:
            print term, codings[state]
  
        print '__eos__ STOP 1.0'

    except IOError:
      pass

    fh.close()

  elif args[0] == 'segments2chunks':
   
    fh = open(args[1])
    for line in fh:
      is_open = False
      try:
        items = line.split()[:-1]
        for item in items:
          if item[0] == item[-1] == '"':
            print item[1:-1], 
          elif item[0] == '"':
            print '( ' + item[1:],
            is_open = True
          elif item[-1] == '"':
            print item[:-1] + ' )',
            assert is_open
            is_open = False
          else:
            print item,
        if is_open:
          print ')',
        print 
      except IOError:
        fh.close()


  elif args[0] == 'bio2productions':
    
    fh = len(args) > 1 and open(args[1]) or sys.stdin
    prev_wrd, prev_tag = fh.next().split()

    no_rule = ['EOS']

    try:
      for wrd, tag in imap(methodcaller('split'), fh):
      
        if prev_tag in no_rule:
          print '%s|%s' % (prev_tag, prev_wrd)

        else:
          print '%s|%s %s' % (prev_tag, prev_wrd, tag)

        prev_tag, prev_wrd = tag, wrd

      print '%s|%s' % (prev_tag, prev_wrd)

    except IOError:
      pass

  elif args[0] == 'spl2wpl':
    fh = len(args) > 1 and open(args[1]) or sys.stdin

    print '__start__'
    for line in fh:
      wrds = line.split()
      while len(wrds) > 0 and wrds[0] == '__stop__':
        del wrds[0]
      
      while len(wrds) > 0 and wrds[-1] == '__stop__':
        del wrds[-1]

      for wrd in wrds:
        print wrd
      print '__eos__'

  elif args[0] == 'tags2doubletags':

    prev_tag = 'BOS'
    no_double = ['BOS','EOS','STOP']
   
    fh = len(args) > 1 and open(args[1]) or sys.stdin
    for wrd, tag in imap(methodcaller('split'), fh):

      if tag in no_double:
        print wrd, tag
      else:
        print wrd, tag + '^' + prev_tag

      prev_tag = tag

  elif args[0] == 'doubletags2tags':
    
    fh = len(args) > 1 and open(args[1]) or sys.stdin
    for wrd, tag in imap(methodcaller('split'), fh):
      print wrd, tag.split('^')[0]

  elif args[0] == 'bio2chunk':

    fh = len(args) > 1 and open(args[1]) or sys.stdin

    open_parens = False

    try:
      assert fh.next().strip() == '__start__ STOP'
      for line in fh:
        word, tag = line.split()

        if tag != 'I' and open_parens:
          print ')',
          open_parens = False

        if tag == 'B':
          print '(',
          open_parens = True

        if tag in ['B','I','O'] and word != '__stop__':
          print word,
      
        if word == '__eos__':
          print

    except IOError:
      pass

  elif args[0] == 'chunk2bio':

    fh = open(args[1])
    stop_sym = opt.stop_sym or '__stop__'

    txt_fh = open(args[2])

    try:
      print '__start__ STOP'

      for line, txt_line in izip(fh, txt_fh):
        beg_chunk = in_chunk = False

        chunk_items = line.split()
        txt_items = txt_line.split()

        txt_item_ind = 0

        is_bos = True

        for chunk_item in chunk_items:
          
          if chunk_item in ['(',')']:
            
            if chunk_item == '(':
              beg_chunk = True

            elif chunk_item == ')':
              beg_chunk = in_chunk = False

            else:
              raise RuntimeError

          else:
            
            while txt_items[txt_item_ind] == stop_sym:
              if in_chunk:
                print '__stop__ I'
              elif not is_bos:
                print '__stop__ STOP'
              txt_item_ind += 1
              is_stop = True

            assert txt_items[txt_item_ind] == chunk_item, \
              'AssertionError: txt and chunk do not match: %s %s' % \
              (txt_items[txt_item_ind], chunk_item)

            if beg_chunk:
              print '%s B' % chunk_item
              beg_chunk = False
              in_chunk = True

            elif in_chunk:
              print '%s I' % chunk_item

            else:
              print '%s O' % chunk_item

            txt_item_ind += 1

            is_bos = False

        if len(txt_items) <= txt_item_ind:
          assert len(txt_items) == txt_item_ind + 1
          assert txt_items[txt_item_ind] == stop_sym

        print '__eos__ STOP'

    except IOError:
      pass

  elif args[0] == 'wsj2spl':

    files = args[1:]
    tree_iter = wsj_tree_iter('./', files)
    filt = lambda w,p:p in WSJ_RM_POS
    punc = lambda w,p:p in WSJ_PUNC_POS

    corpus = Corpus(tree_iter = tree_iter,
                    filt = filt,
                    punc = punc,
                    stop_sym = opt.stop_sym)

    try:
      for s in corpus: print s

    except IOError:
      pass

  elif args[0] == 'wsj2csv':

    if opt.output:
      fh = open(opt.output, 'wb')
    else:
      fh = sys.stdout

    files = args[1:]
    tree_iter = wsj_tree_iter('./', files)
    filt = lambda w,p:p in WSJ_RM_POS

    header = ['Sentence', 'StartIndex', 'EndIndex', 'Category', 'CategoryShort']
    data = [header]
    for n, tree in enumerate(tree_iter):
      clean_tree(tree, filt)
      v = len(tree.leaves())
      for x in xrange(v):
        tree[tree.leaf_treeposition(x)] = x
      for subtr in tree.subtrees():
        leaves = subtr.leaves()
        cat = subtr.node
        cat_short = cat.split('-')[0]
        data.append([n,leaves[0],leaves[-1]+1,cat,cat_short])

    writer = csv.writer(fh)
    writer.writerows(data)

    if opt.output:
      fh.close()

  elif args[0] == 'wsj2posspl':

    files = args[1:]
    tree_iter = wsj_tree_iter('./', files)
    filt = lambda w,p:p in WSJ_RM_POS
    punc = lambda w,p:p in WSJ_PUNC_POS

    corpus = Corpus(tree_iter = tree_iter,
                    filt = filt,
                    punc = punc,
                    use_pos=True,
                    stop_sym = opt.stop_sym)

    try:
      for s in corpus: print s

    except IOError:
      pass

  elif args[0] == 'mkposmerged':
    
    fh1, fh2 = map(open, args[1:3])

    for l1, l2 in izip(fh1, fh2):
      
      l1 = l1.split()
      l2 = l2.split()

      assert len(l1) == len(l2)

      for w1, w2 in izip(l1, l2):
        
        if w1 == opt.stop_sym:
          print w1,

        else:
          print w1 + '-' + w2,

      print

  elif args[0] == 'pos2txt':
    
    pos_fh = open(args[1])
    txt_fh = open(args[2])

    for pos_line, txt_line in izip(pos_fh, txt_fh):
      
      pos = pos_line.split()
      txt = [k for k in txt_line.split() if k != opt.stop_sym]

      i = 0
      for w in pos:
        if w in ['(',')']:
          print w,
        
        else:
          print txt[i],
          i += 1

      print

  elif args[0] == 'subset':
   
    n = int(args[2])
    try:
      for line in open(args[1]):
        if len(str_remove_ignore(line, opt.stop_sym).split()) <= n:
          print line.rstrip()

    except IOError:
      pass

  elif args[0] == 'wsj-nps-gold-standard':
    files = args[1:]
    tree_iter = wsj_tree_iter('./', files)
    filt = lambda w,p:p in WSJ_RM_POS

    try:
      keeper = phrase_paren_keeper(['NP','QP','WHNP'])
      for tree in tree_iter:
        clean_tree(tree, filt)
        assert len(tree.leaves())
        for x in cat_phrases(tree, keeper):
          print x,
        print

    except IOError:
      pass

  elif args[0] == 'wsj-chunk-gold-standard':
    files = args[1:]
    tree_iter = wsj_tree_iter('./', files)
    filt = lambda w,p:p in WSJ_RM_POS

    try:
      for tree in tree_iter:
        clean_tree(tree, filt)
        assert len(tree.leaves())
        for x in just_phrases(tree):
          print x,
        print

    except IOError:
      pass

  elif args[0] == 'wsj-tree-gold-standard':
    files = args[1:]
    tree_iter = wsj_tree_iter('./', files)
    filt = lambda w,p:p in WSJ_RM_POS 

    try:
      for tree in tree_iter:
        clean_tree(tree, filt)
        assert len(tree.leaves())
        pprint_nonodes(tree, sys.stdout)
        print

    except IOError:
      pass

  elif args[0] == 'wsj-corpus-study':

    files = args[1:]
    tree_iter = wsj_tree_iter('./', files)
    filt = lambda w,p:p in WSJ_RM_POS

    d = defaultdict(lambda:0)
    for tree in tree_iter:
      clean_tree(tree, filt)
      assert len(tree.leaves())
      phrases = just_phrases(tree)
      i = 0
      chunks = []
      for item in phrases:
        if item == '(':
          openb = i
        elif item == ')':
          label = tree[tree.treeposition_spanning_leaves(openb,i)].node
          d[label] += 1
        else:
          i += 1

    total = float(sum(d.values()))
    print 'fine-grained:'
    for label, val in d.iteritems(): 
      print '%9s %5d   %2.1f %%' % (label, val, 100.0 * val / total)
    print

    print 'coarse-grained:'
    d1 = defaultdict(lambda:0)
    for label, val in d.iteritems():
      d1[label.split('-')[0].split('=')[0]] += val
    for label, val in d1.iteritems(): 
      print '%9s %5d   %2.1f %%' % (label, val, 100.0 * val / total)

  elif args[0] == 'negra2spl':

    fname = args[1]
    tree_iter = negra_tree_iter(fname)
    filt = lambda w,p:p.startswith('*') or p in NEGRA_RM_POS or w in STOPPING_PUNC

    corpus = Corpus(tree_iter = tree_iter,
                    filt = filt,
                    punc = lambda w,p:False,
                    stop_sym = opt.stop_sym)

    try:
      for s in corpus: print s

    except IOError:
      pass

  elif args[0] == 'negra-tree-gold-standard':

    fname = args[1]
    tree_iter = negra_tree_iter(fname)
    filt = lambda w,p:p.startswith('*') or p in NEGRA_RM_POS or w in STOPPING_PUNC

    try:
      for tree in tree_iter:
        clean_tree(tree, filt)
        assert len(tree.leaves())
        pprint_nonodes(tree, sys.stdout)
        print

    except IOError:
      pass

  elif args[0] == 'negra-chunk-gold-standard':

    fname = args[1]
    tree_iter = negra_tree_iter(fname)
    filt = lambda w,p:p.startswith('*') or p in NEGRA_RM_POS or w in STOPPING_PUNC
    punc = lambda w,p:p in NEGRA_PUNC_POS

    try:
      for tree in tree_iter:
        clean_tree(tree, filt)
        assert len(tree.leaves())
        for x in just_phrases(tree):
          print x,
        print 

    except IOError:
      pass

  elif args[0] == 'negra-nps-gold-standard':

    fname = args[1]
    tree_iter = negra_tree_iter(fname)
    filt = lambda w,p:p.startswith('*') or p in NEGRA_RM_POS or w in STOPPING_PUNC
    punc = lambda w,p:p in NEGRA_PUNC_POS

    try:
      keeper = phrase_paren_keeper(['NP','CNP'])
      for tree in tree_iter:
        clean_tree(tree, filt)
        assert len(tree.leaves())
        for x in cat_phrases(tree, keeper):
          print x,
        print 

    except IOError:
      pass

  elif args[0] == 'negra-corpus-study':

    fname = args[1]
    tree_iter = negra_tree_iter(fname)
    filt = lambda w,p:p.startswith('*') or p in NEGRA_RM_POS or w in STOPPING_PUNC
    punc = lambda w,p:p in NEGRA_PUNC_POS

    d = defaultdict(lambda:0)
    for tree in tree_iter:
      clean_tree(tree, filt)
      assert len(tree.leaves())
      phrases = just_phrases(tree)
      i = 0
      chunks = []
      for item in phrases:
        if item == '(':
          openb = i
        elif item == ')':
          label = tree[tree.treeposition_spanning_leaves(openb,i)].node
          d[label] += 1
        else:
          i += 1

    total = float(sum(d.values()))
    print 'fine-grained:'
    for label, val in d.iteritems(): 
      print '%9s %5d   %2.1f %%' % (label, val, 100.0 * val / total)
    print

    print 'coarse-grained:'
    d1 = defaultdict(lambda:0)
    for label, val in d.iteritems():
      d1[label.split('-')[0]] += val
    for label, val in d1.iteritems(): 
      print '%9s %5d   %2.1f %%' % (label, val, 100.0 * val / total)

  elif args[0] == 'ctb2spl':

    files = args[1:]
    tree_iter = ctb_tree_iter(files)
    filt = lambda w,p:p in CTB_RM_POS or w in STOPPING_PUNC
    punc = lambda w,p:p in CTB_PUNC_POS

    corpus = Corpus(tree_iter = tree_iter,
                    filt = filt,
                    punc = punc,
                    stop_sym = opt.stop_sym)

    try:
      for s in corpus: 
        if len(s):
          print s
        else:
          print ''

    except IOError:
      pass

  elif args[0] == 'clean-up-output':
    
    for line in sys.stdin:
      parts = line.split()
      parts = [w.lower() for w in parts if w not in STOPPING_PUNC]
      print ' '.join(parts)

  elif args[0] == 'ctb-tree-gold-standard':

    files = args[1:]
    tree_iter = ctb_tree_iter(files)
    filt = lambda w,p:p in CTB_RM_POS or w in STOPPING_PUNC
    punc = lambda w,p:p in CTB_PUNC_POS

    try:
      for tree in tree_iter:
        clean_tree(tree, filt)
        if len(tree.leaves()):
          pprint_nonodes(tree, sys.stdout)
          print
        else:
          print ''

    except IOError:
      pass

  elif args[0] == 'ctb-chunk-gold-standard':

    files = args[1:]
    tree_iter = ctb_tree_iter(files)
    filt = lambda w,p:p in CTB_RM_POS or w in STOPPING_PUNC
    punc = lambda w,p:p in CTB_PUNC_POS

    try:
      for tree in tree_iter:
        clean_tree(tree, filt)
        if len(tree.leaves()):
          for x in just_phrases(tree): print x,
          print

        else:
          print ''

    except IOError:
      pass

  elif args[0] == 'ctb-nps-gold-standard':

    files = args[1:]
    tree_iter = ctb_tree_iter(files)
    filt = lambda w,p:p in CTB_RM_POS or w in STOPPING_PUNC
    punc = lambda w,p:p in CTB_PUNC_POS

    keeper = phrase_paren_keeper(['DP','NP','DNP','QP'])

    try:
      for tree in tree_iter:
        clean_tree(tree, filt)
        if len(tree.leaves()):
          for x in cat_phrases(tree,keeper) : print x,
          print

        else:
          print ''

    except IOError:
      pass

  elif args[0] == 'ctb-corpus-study':
        
    files = args[1:]
    tree_iter = ctb_tree_iter(files)
    filt = lambda w,p:p in CTB_RM_POS or w in STOPPING_PUNC
    punc = lambda w,p:p in CTB_PUNC_POS

    d = defaultdict(lambda:0)
    for tree in tree_iter:
      clean_tree(tree, filt)
      if len(tree.leaves()):
        phrases = just_phrases(tree)
        i = 0
        chunks = []
        for item in phrases:
          if item == '(':
            openb = i
          elif item == ')':
            label = tree[tree.treeposition_spanning_leaves(openb,i)].node
            d[label] += 1
          else:
            i += 1

    total = float(sum(d.values()))
    print 'fine-grained:'
    for label, val in d.iteritems(): 
      print '%9s %5d   %2.1f %%' % (label, val, 100.0 * val / total)
    print

    print 'coarse-grained:'
    d1 = defaultdict(lambda:0)
    for label, val in d.iteritems():
      d1[label.split('-')[0]] += val
    for label, val in d1.iteritems(): 
      print '%9s %5d   %2.1f %%' % (label, val, 100.0 * val / total)

  elif args[0] == 'chunk2sp':

    try:
      alpha, corpus = load(open(args[2], 'r', -1))

    except UnpicklingError:
      alpha = Alpha()
      corpus = make_corpus(open(args[2], 'r', -1), alpha)

    try:
      for s, line in enumerate(open(args[1], 'r', -1)):
  
        toks = line.split()
        terms, chunks = chunk_index(toks)
        n = len(terms) 
  
        if opt.stop_sym == None: 
          opt.stop_sym = "__stop__"

        stop_val = alpha[opt.stop_sym]
        j = -1
        codes = corpus[s][:]
        bracks = []


        try:
          while codes[0] == stop_val:
            codes = codes[1:]
          open_b = 0
          for code in codes:
            if code == stop_val:
              if j < n:
                bracks.append((open_b,j))
                open_b = j+1
            else:
              j += 1

          bracks.extend([(i,j-1) for i,j in chunks])

          b = Bracketing(terms, map(brak_from_tup, bracks))
          print b

        except IndexError:
          print 

    except IOError:
      pass

  elif args[0] == 'chunk2lb':

    try:
      alpha, corpus = load(open(args[2], 'r', -1))

    except UnpicklingError:
      alpha = Alpha()
      corpus = make_corpus(open(args[2], 'r', -1), alpha)

    if opt.stop_sym == None: opt.stop_sym = '__stop__'
    stop_val = alpha[opt.stop_sym]
  
    fh = open(args[1], 'r', -1)
    try:
      for s, line in enumerate(fh):
        parts = chunk2parts(line, corpus[s], stop_val)
        
        for part in parts:
          if len(part) > 1:

            for _ in xrange(len(part) - 1):
              print '(',

            print part[0],

            for p in part[1:]:
              print p,
              print ')',

          else:
            print part[0],

        print

    except IOError:
      fh.close()

  elif args[0] == 'chunk2rb':

    try:
      alpha, corpus = load(open(args[2], 'r', -1))

    except UnpicklingError:
      alpha = Alpha()
      corpus = make_corpus(open(args[2], 'r', -1), alpha)
    
    if opt.stop_sym == None: opt.stop_sym = '__stop__'
    stop_val = alpha[opt.stop_sym]
   
    fh = open(args[1], 'r', -1)
    try:
      for s, line in enumerate(fh):
        parts = chunk2parts(line, corpus[s], stop_val)

        for part in parts:
          print '(',
          if len(part) > 1:
            for p in part[:-1]:
              print '(',
              print p,

            print part[-1],
            for _ in xrange(len(part) - 1):
              print ')',

          else:
            print part[0],

        print ') ' * len(parts)

    except IOError:
      fh.close()
  
  elif args[0] == 'seg2chunk':
    
    sentences = []
    curr = []
    for line in open(args[1], 'r', -1):
      if line[0] != '#':
        line = line.strip().replace('(','( ').replace(')',' )')
        if len(line) == 0:
          if len(curr) != 0:
            sentences.append(curr)
            curr = []
        
        else:
          curr.extend(line.split())

    for brak in sentences:
      for x in just_phrases_from_items(brak): print x,
      print

  elif args[0] == 'seg2tree':
    
    sentences = []
    curr = []
    for line in open(args[1], 'r', -1):
      if line[0] != '#':
        line = line.strip().replace('(','( ').replace(')',' )')
        if len(line) == 0:
          if len(curr) != 0:
            sentences.append(curr)
            curr = []
        
        else:
          curr.extend(line.split())

    for brak in sentences:
      print ' '.join(brak)

  elif args[0] == 'compare-bigrams':

    assert len(args) == 3, 'require gold-standard and output files'
    for c in filter(comment, open(args[2]).readlines()): print c.strip()

    words, gold = chunk_index_corpus(args[1])
    words1, outp = chunk_index_corpus(args[2])

    for i, s1, s2 in izip(icount(1), words, words1):
      assert s1 == s2, 'sentences do not match\n' +\
        '%d\n%s\n%s' % (i, ' '.join(s1), ' '.join(s2))

    gold = map(split_chunks, gold)
    outp = map(split_chunks, outp)

    n_true_pos = 0
    n_false_pos = 0
    n_false_neg = 0

    for i in range(len(gold)):

      gold_phrases = set(gold[i])
      outp_phrases = set(outp[i])

      true_pos = gold_phrases & outp_phrases
      false_pos = outp_phrases - gold_phrases
      false_neg = gold_phrases - outp_phrases

      # Some error analysis can be done here

      n_true_pos += len(true_pos)
      n_false_pos += len(false_pos)
      n_false_neg += len(false_neg)

    try:
      prec = 100. * n_true_pos / (n_true_pos + n_false_pos)
      rec = 100. * n_true_pos / (n_true_pos + n_false_neg)
      f1 = 2. * prec * rec / (prec + rec)
    except ZeroDivisionError:
      prec = rec = f1 = 0.

    print 'P = %.2f\tR = %.2f\tF = %.2f' % (prec, rec, f1)

    print 'TP = %d\tFP = %d\tFN = %d' % (n_true_pos, n_false_pos, n_false_neg)

  elif args[0] == 'exp2csv':

    if opt.output:
      fh = open(opt.output, 'wb')
    else:
      fh = sys.stdout

    header = ['Sentence', 'StartIndex', 'EndIndex', 'Terms', 'PosSeq', 'Category',
              'CategoryShort', 'Result', 'ErrorType']

    gold_words, exp_gold = chunk_index_corpus(args[1])
    outp_words, exp_outp = chunk_index_corpus(args[2])

    assert outp_words == gold_words

    pos_corpus = [[w for w in s.split() if w != '__stop__'] for s in open(args[3]).readlines()]
    treebank = list(iter(csv.reader(open(args[4]))))
    for i in xrange(1,len(treebank)):
        for j in xrange(3):
            treebank[i][j] = int(treebank[i][j])
    cat_dict = dict([(tuple(s[:3]), s[3]) for s in treebank])
    cat_short_dict = dict([(tuple(s[:3]), s[4]) for s in treebank])

    data = [header]

    for n, words, pos, gold_p, outp_p in \
        izip(icount(), outp_words, pos_corpus, exp_gold, exp_outp):

      gold_p, outp_p = set(gold_p), set(outp_p)
      tp, fp, fn = gold_p & outp_p, outp_p - gold_p, gold_p - outp_p

      for sta, end in tp:
        terms = ' '.join(words[sta:end])
        pos_seq = '-'.join(pos[sta:end])
        cat = cat_dict[n,sta,end]
        cat_short = cat_short_dict[n,sta,end]
        data.append([n,sta,end,terms,pos_seq,cat,cat_short,'TP','NA'])

      for sta, end in fp:
        terms = ' '.join(words[sta:end])
        pos_seq = '-'.join(pos[sta:end])

        error_type = 'NoOverlap'
        for sta1, end1 in fn:
          if sta1 <= sta and end <= end1:
            error_type = 'Sub'
            break
          elif (sta < sta1 and end < end1) or (sta1 < sta or end1 < end):
            error_type = 'Crossing'
            break
          elif sta <= sta1 and end1 <= end:
            error_type = 'Super'
            break
    
        data.append([n,sta,end,terms,pos_seq,'NA','NA','FP',error_type])

      for sta, end in fn:
        terms = ' '.join(words[sta:end])
        pos_seq = '-'.join(pos[sta:end])
        cat = cat_dict[n,sta,end]
        cat_short = cat_short_dict[n,sta,end]

        error_type = 'unset'
        for sta1, end1 in fn:
          if sta1 <= sta and end <= end1:
            error_type = 'Sub'
            break
          elif (sta < sta1 and end < end1) or (sta1 < sta or end1 < end):
            error_type = 'Crossing'
            break
          elif sta <= sta1 and end1 <= end:
            error_type = 'Super'
            break

        assert error_type != 'unset'

        data.append([n,sta,end,terms,pos_seq,cat,cat_short,'FN',error_type])

    writer = csv.writer(fh)
    writer.writerows(data)

    if opt.output:
      fh.close()

  elif args[0] == 'compare':

    notstop = lambda x:x != '__stop__'

    if opt.output:
      output = open(opt.output, 'w')
    else:
      output = None

    assert len(args) >= 3, 'require gold-standard and output files'
    for c in filter(comment, open(args[2]).readlines()): print c.strip()

    words, gold = chunk_index_corpus(args[1])
    words1, outp = chunk_index_corpus(args[2])

    if len(args) > 3:
      pos_fh = open(args[3])
      pos_tp = defaultdict(lambda:0)
      pos_fp = defaultdict(lambda:0)
      pos_fn = defaultdict(lambda:0)
    else:
      pos_fh = False

    for i, s1, s2 in izip(icount(1), words, words1):
      assert [w.lower() for w in s1] == [w.lower() for w in s2], 'sentences do not match\n' +\
        '%d\n%s\n%s' % (i, ' '.join(s1), ' '.join(s2))

    count = [0] * 10
    lens = [0] * 5

    count_by_len = [[0] * 5 for i in xrange(5)]

    by_pos = [defaultdict(lambda:0) for i in xrange(5)]

    tp_id, fp_id, fn_id, all_gold, all_pred, \
    tp_big_id, fp_big_id, fn_big_id, all_gold_big, all_pred_big = range(10)

    substring_count = 0
    supstring_count = 0

    alt_count = [0] * 5

    for i in range(len(gold)):

      gold_phrases = set(gold[i])
      outp_phrases = set(outp[i])

      true_pos = gold_phrases & outp_phrases
      false_pos = outp_phrases - gold_phrases
      false_neg = gold_phrases - outp_phrases

      for x in false_pos:
        for y in false_neg:
          if y[0] <= x[0] and x[1] <= y[1]:
            substring_count += 1
            break

      for y in false_neg:
        for x in false_pos:
          if y[0] <= x[0] and x[1] <= y[1]:
            supstring_count += 1
            break

      gold_big = set(split_chunks(gold_phrases))
      outp_big = set(split_chunks(outp_phrases))
      tp_big = gold_big & outp_big
      fp_big = outp_big - gold_big
      fn_big = gold_big - outp_big

      if pos_fh:
        pos = filter(notstop, pos_fh.next().split())

      for id, data in zip(range(5), [true_pos, false_pos, false_neg, gold_phrases, outp_phrases]):

        for x in data:
          l = x[1] - x[0]
          if l <= 1:
            print >>sys.stderr, 'len %d clump -- ignoring' % l
          else:
            lens[id] += l
            count[id] += 1

            nlen = min(l,6)
            nlen -= 2
            count_by_len[id][nlen] += 1

            if pos_fh:
              alt_count[id] += 1
              by_pos[id]['-'.join(pos[x[0]:x[1]])] += 1

      for id, data in zip(range(5,10), [tp_big, fp_big, fn_big, gold_big, outp_big]):
        count[id] += len(data)

    if pos_fh: assert alt_count == count[:5]

    prec = 100. * count[tp_id] / count[all_pred]
    rec = 100. * count[tp_id] / count[all_gold]
    f1 = 2. * prec * rec / (prec + rec)

    prec_big = 100. * count[tp_big_id] / count[all_pred_big]
    rec_big = 100. * count[tp_big_id] / count[all_gold_big]
    f1_big = 2. * prec_big * rec_big / (prec_big + rec_big)

    print 'Summary %.2f / %.2f / %.2f ( %d / %d / %d )' % ((prec, rec, f1) + tuple(count[:3]))
    print 'PerBigr %.2f / %.2f / %.2f ( %d / %d / %d )' % ((prec_big, rec_big, f1_big) + tuple(count[5:8]))
    print 'Substring prop %.2f ( %d )' % (100. * substring_count/count[fp_id], substring_count)
    print 'Supstring prop %.2f ( %d )' % (100. * supstring_count/count[fn_id], supstring_count)

    if output:
      print >>output, 'Summary,TP,FP,FN,,'
      print >>output, 'Acc,%d,%d,%d,,' % tuple(count[:3])
      print >>output, 'Per bigr,%d,%d,%d,,' % (tuple(count[5:8]))
      print >>output, ',,,,,'

    means = tuple([float(lens[id])/count[id] for id in range(5)])
    print 'Mean Len : TP %.2f / FP %.2f / FN %.2f / Gold %.2f / Pred %.2f' % means

    if output:
      print >>output, 'Mean Len,TP,FP,FN,All pred,All gold'
      print >>output, ',%.2f,%.2f,%.2f,%.2f,%.2f' % means
      print >>output, ',,,,,'
      print >>output, ',Sub FP,All FP,Sup FN,All FN,'
      print >>output, ',%d,%d,%d,%d,' % (substring_count, count[fp_id], supstring_count, count[fn_id])
      print >>output, ',,,,,'


    if output:
      print >>output, 'By clump len,Len,TP,FP,FN'
    len_names = map(str, range(2,6)) + ['>5']

    for nlen in xrange(5):
      tp_by_len, fp_by_len, fn_by_len = (count_by_len[id][nlen] for id in range(3))
      if tp_by_len == 0:
        nlen_prec = 0.
        nlen_rec = 0.
        nlen_f = 0.
      else:
        nlen_prec = 100. * tp_by_len / (tp_by_len + fp_by_len)
        nlen_rec = 100. * tp_by_len / (tp_by_len + fn_by_len)
        nlen_f = 2. * nlen_prec * nlen_rec / (nlen_prec + nlen_rec)

      print 'NLEN %s %.2f / %.2f / %.2f ( %d / %d / %d )' % \
        (len_names[nlen], nlen_prec, nlen_rec, nlen_f, tp_by_len, fp_by_len, fn_by_len)

      if output:
        print >>output, ',%s,%d,%d,%d,' % (len_names[nlen], tp_by_len, fp_by_len, fn_by_len)

    if output:
      print >>output, ',,,,,'
        

    if pos_fh:
      pos_lists = [[(y,x) for (x,y) in by_pos[id].items()] for id in range(3)]
      for ls in pos_lists: ls.sort()

      names = ['POS-TP','POS-FP','POS-FN']
      for i in xrange(3):
        assert sum(p[0] for p in pos_lists[i]) == count[i]
        assert sum(by_pos[i].values()) == count[i]

      for name, d_list in zip(names, pos_lists):
        print '==',name,'=='
        if output:
          print >>output, '%s,POS,TP,FP,FN' % name
        for i in xrange(10):
          p = d_list[-1-i][1]
          print '%s %s\t%d' % (name, p, d_list[-1-i][0]),
          tp, fp, fn = tuple(by_pos[id][p] for id in range(3))
          prec = tp and (100. * tp / (tp + fp)) or 0
          rec = tp and (100. * tp / (tp + fn)) or 0
          f = tp and (2 * prec * rec / (prec + rec)) or 0
          print '\t%.2f / %.2f / %.2f ( %d / %d / %d )' % (prec, rec, f, tp, fp, fn)
          if output:
            print >>output, ',%s,%d,%d,%d' % (p, tp, fp, fn)

        donotcount = [x[1] for x in d_list[-10:]]
        tp, fp, fn = \
          tuple([sum([x[1] for x in by_pos[id].items() if x[0] not in donotcount]) \
                 for id in range(3)])
        print '%s Others\t%d' % (name, sum(x[0] for x in d_list[:-10])),
        prec = tp and (100. * tp / (tp + fp)) or 0
        rec = tp and (100. * tp / (tp + fn)) or 0
        f = tp and (2 * prec * rec / (prec + rec)) or 0
        print '\t%.2f / %.2f / %.2f ( %d / %d / %d )' % (prec, rec, f, tp, fp, fn)
        if output:
          print >>output, ',Others,%d,%d,%d,' % (tp, fp, fn)
          print >>output, ',,,,,'

    if output:
      output.close()

  else:
    print >>sys.stderr, 'unexpected action', args[0]
    sys.exit(1)
Exemplo n.º 15
0
    p = input(" Please input a string of symbols:\n")
else:
    p = input()

prop = LogicProposition(p)
table = gen_table(prop)

from openpyxl import Workbook
from openpyxl.utils import get_column_letter
from itertools import count as icount

wb = Workbook()
sheet = wb.active
sheet.title = "Truth Table"

for row_index, line in zip(icount(1), table):
    for column_name, item in zip((get_column_letter(i) for i in icount(1)),
                                 line):
        try:
            item = int(item)
        except (ValueError, TypeError):
            item = str(item)
            if item == "T":
                item = True
            elif item == "F":
                item = False

        cell = sheet[column_name + str(row_index)]
        cell.value = item

        if row_index > 1 and column_name != 'A':
Exemplo n.º 16
0
 def _InsertItems(self, index, items):
     value = self.current_value
     lst = self.lst
     value[index:index] = items
     for i, v in zip(icount(index), items):
         self._Insert(i, v, lst)