def main(args=None): parser = ArgumentParser() parser.add_argument('input_file') parser.add_argument('output_file') conf = parser.parse_args(args) with open(conf.input_file, 'r') as io: conllu_document = io.read() for document in read_into_documents(conllu_document): PickleSerializer.event_to_file( document.event, conf.output_file + '/' + document.event.event_id + '.pickle')
def test_pickle_serializer(): event = Event(event_id='1') event.metadata['foo'] = "bar" document = Document('plaintext', text='Some text.') event.add_document(document) one = label(start_index=0, end_index=5, x=10) two = label(start_index=6, end_index=10, x=15) document.add_labels('one', [one, two]) document.add_labels('two', [ label(start_index=0, end_index=25, a='b', b=one), label(start_index=26, end_index=42, a='c', b=two) ]) document.add_labels('three', [ label(start_index=0, end_index=10, foo=True), label(start_index=11, end_index=15, foo=False) ], distinct=True) with TemporaryFile('wb+') as tf: PickleSerializer.event_to_file(event, tf) tf.flush() tf.seek(0) e = PickleSerializer.file_to_event(tf) assert e.event_id == event.event_id assert e.metadata['foo'] == 'bar' d = e.documents['plaintext'] assert d.text == document.text index_one = d.labels['one'] assert index_one == [one, two] index_two = d.labels['two'] assert index_two == [ label(start_index=0, end_index=25, a='b', b=one), label(start_index=26, end_index=42, a='c', b=two) ] index_three = d.labels['three'] assert index_three == [ label(start_index=0, end_index=10, foo=True), label(start_index=11, end_index=15, foo=False) ]
def test_pickle_serializer(): event = Event(event_id='1') event.metadata['foo'] = "bar" document = Document('plaintext', text='Some text.') event.add_document(document) document.add_labels('one', [ label(start_index=0, end_index=5, x=10), label(start_index=6, end_index=10, x=15) ]) document.add_labels('two', [ label(start_index=0, end_index=25, a='b'), label(start_index=26, end_index=42, a='c') ]) document.add_labels('three', [ label(start_index=0, end_index=10, foo=True), label(start_index=11, end_index=15, foo=False) ], distinct=True) with NamedTemporaryFile('rb') as tf: PickleSerializer.event_to_file(event, tf.name) tf.flush() tf.seek(0) o = pickle.load(tf) assert o['event_id'] == '1' assert o['metadata']['foo'] == 'bar' d = o['documents']['plaintext'] assert d['text'] == 'Some text.' assert len(d['label_indices']) == 3 assert d['label_indices']['one'] == { 'json_labels': [{ 'start_index': 0, 'end_index': 5, 'x': 10 }, { 'start_index': 6, 'end_index': 10, 'x': 15 }], 'distinct': False } assert d['label_indices']['two'] == { 'json_labels': [{ 'start_index': 0, 'end_index': 25, 'a': 'b' }, { 'start_index': 26, 'end_index': 42, 'a': 'c' }], 'distinct': False } assert d['label_indices']['three'] == { 'json_labels': [{ 'start_index': 0, 'end_index': 10, 'foo': True }, { 'start_index': 11, 'end_index': 15, 'foo': False }], 'distinct': True }