Exemplo n.º 1
0
def generate_inputs_from_constraints(constraint_dict,
                                     min_size,
                                     max_string_size=100):
    #sample a size from min to max
    size = random.randint(min_size, max_string_size)
    indices = set(range(size))
    slist = random.choices(printable[:-4], k=size)
    # schematically:
    #print("min_size", min_size)
    #print("size", size)
    for item in constraint_dict:
        #print("ITEM", item)
        #print("sliststr:", ''.join(slist))
        num_to_insert = max(
            0, constraint_dict[item] -
            len(re.findall(re.escape(item), ''.join(slist))))
        #print("num_to_insert", num_to_insert)
        if len(indices) < num_to_insert: return None
        indices_to_insert = set(random.sample(indices, k=num_to_insert))
        # do something here
        #print("PREG INPUT",item if item not in preg_dict else preg_dict[item])
        for i in indices_to_insert:
            slist[i] = pre.create(
                item).sample() if item not in preg_dict else pre.create(
                    preg_dict[item]).sample()
        indices = indices - indices_to_insert
        #may be too big but whatever
    string = ''.join(slist)
    if len(string) > max_string_size:
        return string[:max_string_size]  # may break but whatever
    return string
Exemplo n.º 2
0
def match_col(dataset, rstring):
    r = pregex.create(rstring)
    matches = []
    for i, col in enumerate(dataset):
        score = sum([r.match(example) for example in col])
        if score != float('-inf'):
            matches.append(i)
    return matches
Exemplo n.º 3
0
def regex_plus_bound(X):
    from pregex import pregex
    c = Counter(X)
    regexes = [
        pregex.create(".+"),
        pregex.create("\d+"),
        pregex.create("\w+"),
        pregex.create("\s+"),
        pregex.create("\\u+"),
        pregex.create("\l+")]
    regex_scores = []
    for r in regexes:
        regex_scores.append(sum(c[x] * r.match(x) for x in c)/float(sum([len(x) for x in X])) )
    return max(regex_scores)