def get_invalid_matches(manifest, input): """util function to return any invalid matches when running through a test file""" normalizer = Normalizer() normalizer.read_manifest(manifest) samples = read_input(input) matched = normalize_samples(normalizer, samples, verbose=False) invalid_matches = [match for match in matched if match['expected'] != match['output']] return invalid_matches
def check_data(manifest, sample): normalizer = Normalizer() normalizer.read_manifest(manifest) with open(sample, 'r') as f: sample_file = f.read().strip() if len(sample_file) > 1: for line in sample_file.split('\n')[1:]: # line in the manifest: section_id,section_name,row_id,row_name line = line.strip() elements = line.split(',') section_name = elements[0] row_name = elements[1] # [(section_id, row_id, bool), number of matches, different section ids we have seen, different row_id we have seen] # data example: [(None, None, False), 3, set([170, 11, 118]), set([7])] data = normalizer.normalize_raw(section_name, row_name) # We are looking for matches that result in `False` but are not caused by # database returning multiple entries or row_name value passed as a range count = 0 if data[0][2] == False and data[1] <= 1 and '-' not in row_name: count += 1 print data, section_name, row_name, row_name.isdigit() print 'BAD MATCH: ', count assert count == 0
description='grader for seatgeek SectionNormalization code test') parser.add_argument('--manifest', default=None, help='path to manifest file') parser.add_argument('--input', default=None, help='path to input file') parser.add_argument('--section', default=None, help='section input (for testing)') parser.add_argument('--row', default=None, help='row input (for testing)') args = parser.parse_args() assert args.manifest normalizer = Normalizer() normalizer.read_manifest(args.manifest) if args.section and args.row: section_id, row_id, valid = normalizer.normalize( args.section, args.row) print """ Input: [section] {}\t[row] {} Output: [section_id] {}\t[row_id] {} Valid?: {} """.format(args.section, args.row, section_id, row_id, valid) elif args.input: samples = read_input(args.input)
import csv from normalizer import Normalizer normalizer = Normalizer() normalizer.read_manifest('../../manifests/citifield_sections.csv') # normalizer.read_manifest('../../manifests/dodgerstadium_sections.csv') inp = [] correct = [] # with open('../../samples/dodgertest.csv') as file: with open('../../samples/metstest.csv') as file: reader = csv.reader(file) for line in reader: inp.append({'section': line[0], 'row': line[1]}) if line[4].strip() == 'True': correct.append({ 'section_id': line[2], 'row_id': line[3], 'valid': True }) else: correct.append({ 'section_id': line[2], 'row_id': line[3], 'valid': False }) inp = inp[1:] correct = correct[1:] assert len(inp) == len(correct)