示例#1
0
fieldnames = list(F.getPerson().keys())

# these keys will be considered "important", and reliably typo-free
safeKeys = ["ssn", "sex", "bloodType"]
typoProbability = 0.025

numPeopleWithTypos = 0
totNumTypos = 0
totYield = 0

with open(outputCsvPath, "w") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for p in F.getPeople(numPeople):
        totYield += 1
        typoGiven = False
        for k, v in p.items():
            if k not in safeKeys and type(v) == type(""):
                if rd.random() < typoProbability:
                    typoGiven = True
                    idxs = set()
                    numTypos = rd.randint(1, min(2, len(v)))
                    totNumTypos += numTypos

                    while len(idxs) < numTypos:
                        idxs.add(rd.randint(0, len(v) - 1))

                    # print(p[k],'-->',end=' ')
                    newVal = list(p[k])