def add_chunks_to_file(in_file, out_file, chunklink, fail_on_error): '''Reads a Communication file, adds chunking information, and writes a new Communication file containing the annotated version.''' # Deserialize comm = read_communication_from_file(in_file) # Add chunks num_chunked, num_sents = add_chunks_to_comm(comm, chunklink, fail_on_error) logging.info("Chunked %d / %d = %f" % (num_chunked, num_sents, float(num_chunked) / float(num_sents))) # Serialize write_communication_to_file(comm, out_file)
def add_chunks_to_file(in_file, out_file, chunklink, fail_on_error): '''Reads a Communication file, adds chunking information, and writes a new Communication file containing the annotated version.''' # Deserialize comm = read_communication_from_file(in_file) # Add chunks num_chunked, num_sents = add_chunks_to_comm(comm, chunklink, fail_on_error) logging.info( "Chunked %d / %d = %f" % (num_chunked, num_sents, float(num_chunked) / float(num_sents))) # Serialize write_communication_to_file(comm, out_file)
This Python script is secretly a shell script. """ import os from concrete.util import write_communication_to_file from concrete.util.simple_comm import create_comm text = 'Super simple sentence .' n1 = 'simple_1.concrete' n2 = 'simple_2.concrete' n3 = 'simple_3.concrete' write_communication_to_file(create_comm('one', text), n1) write_communication_to_file(create_comm('two', text), n2) write_communication_to_file(create_comm('three', text), n3) os.system('gzip < %s > %s.gz' % (n1, n1)) os.system('bzip2 < %s > %s.bz2' % (n1, n1)) os.system('cat %s %s %s > simple_concatenated' % (n1, n2, n3)) os.system('gzip < simple_concatenated > simple_concatenated.gz') os.system('bzip2 < simple_concatenated > simple_concatenated.bz2') os.system('tar -cf simple.tar %s %s %s' % (n1, n2, n3)) os.system('tar -czf simple.tar.gz %s %s %s' % (n1, n2, n3)) os.system('tar -cjf simple.tar.bz2 %s %s %s' % (n1, n2, n3)) os.system('zip simple.zip %s %s %s' % (n1, n2, n3)) os.system('mkdir -p a/b a/c') os.system('cp %s a/b/' % n1) os.system('cp %s %s a/c/' % (n2, n3))
)] ) return comm if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--input-json', type=str) parser.add_argument('--aida-ontology', type=str) parser.add_argument('--output-dir', type=str) parser.add_argument('--use-dir', action='store_true') args = parser.parse_args() with open(args.aida_ontology) as f: ontology: Dict = json.load(f) json_doc_stream: Generator[Dict, None, None] = read_json() augf = AnalyticUUIDGeneratorFactory().create() for doc in tqdm(json_doc_stream): new_mentions, new_event_type = get_event(doc) new_comm = json_to_concrete(doc) add_event_to_comm(new_mentions, new_event_type, new_comm) predicted_mention_spans: List[Tuple[int, int]] = get_predicted_mentions(doc) add_additional_mentions_to_comm(mentions=predicted_mention_spans, comm=new_comm) assert validate_communication(new_comm) write_communication_to_file(new_comm, os.path.join(args.output_dir, f'{new_comm.id}.concrete'))