Exemplo n.º 1
0
def ambig_map_sample(lang_pair, k=0, r=1.0, pos=set()):
    """
    take sample from orignal ambiguity map,
    either an absulute number or a fraction,
    possibly restricted to particular POS tags
    """
    ambig_fname = config["sample"][lang_pair]["ambig_fname"]
    ambig_map = AmbiguityMap(ambig_fname)
    
    if pos:
        select = [ sl for sl in ambig_map.source_iter()
                   if sl.rsplit("/",1)[1] in pos ]    
    else:
        select =  list(ambig_map.source_iter())
        
    if r < 1.0:
        k = int(round(len(select) * r))
    elif k == 0:
        k = len(select)
    
    select = random.sample(select, k)
    
    ambig_map.source_target_map = dict( (sl, ambig_map[sl])
                                        for sl in select )
    
    return ambig_map