def parse_file(file_path, schema_path, mode): all_values = [] schema_list = parse_schema(schema_path, mode) with open(file_path, 'r', encoding="ISO-8859-1") as f: for line in f.readlines(): output_row = create_row_dict(line, schema_list) all_values.append(output_row) return all_values
new_question['dependencies'] = [{ 'if': dep[0], 'then': dep[1] } for dep in q_dependencies] new_topic['questions'].append(new_question) j['topics'].append(new_topic) j['text'] = document['text'] raw_tuas = document['tuas'][sample_type] for tua_id, tua in raw_tuas.iteritems(): new_tua = {} new_tua['id'] = tua_id new_tua['type'] = sample_type new_tua['instructions'] = schema['instructions'] new_tua['offsets'] = [{'start': off[0], 'stop': off[1]} for off in tua] sample = copy.deepcopy(j) sample['tua'] = new_tua samples.append(sample) return samples if __name__ == '__main__': sample_schema = parse_schema() sample_doc = parse_document(SAMPLE_DOC) samples = data_to_sample_json(sample_schema, sample_doc, SAMPLE_TYPE) for i, sample in enumerate(samples): fname = '%s_%d.json' % (os.path.splitext( os.path.basename(SAMPLE_DOC))[0], i) with open(fname, 'w') as outf: json.dump(sample, outf)
from parse_schema import parse_schema def schema2dot(schema): dot = ["digraph Schema {"] for table in schema.tables.values(): edges = [ "\t%s -> %s;" % (table.name, r.table) for r in table.relationships.values() ] dot.extend(edges) dot.append("}") return "\n".join(dot) if __name__ == '__main__': import sys schema = parse_schema(sys.stdin) print schema2dot(schema)
new_answer['text'] = answer['text'] new_question['answers'].append(new_answer) new_question['dependencies'] = [{'if': dep[0], 'then': dep[1]} for dep in q_dependencies] new_topic['questions'].append(new_question) j['topics'].append(new_topic) j['text'] = document['text'] raw_tuas = document['tuas'][sample_type] for tua_id, tua in raw_tuas.iteritems(): new_tua = {} new_tua['id'] = tua_id new_tua['type'] = sample_type new_tua['instructions'] = schema['instructions'] new_tua['offsets'] = [{'start': off[0], 'stop': off[1]} for off in tua] sample = copy.deepcopy(j) sample['tua'] = new_tua samples.append(sample) return samples if __name__ == '__main__': sample_schema = parse_schema() sample_doc = parse_document(SAMPLE_DOC) samples = data_to_sample_json(sample_schema, sample_doc, SAMPLE_TYPE) for i, sample in enumerate(samples): fname = '%s_%d.json' % (os.path.splitext(os.path.basename(SAMPLE_DOC))[0], i) with open(fname, 'w') as outf: json.dump(sample, outf)
from parse_schema import parse_schema def schema2dot(schema): dot = ["digraph Schema {"] for table in schema.tables.values(): edges = ["\t%s -> %s;" % (table.name, r.table) for r in table.relationships.values()] dot.extend(edges) dot.append("}") return "\n".join(dot) if __name__ == '__main__': import sys schema = parse_schema(sys.stdin) print schema2dot(schema)