Пример #1
0
def test_CommunicationWriterTGZ_single_file_default_name(output_file, login_info):
    comm = read_communication_from_file("tests/testdata/simple_1.concrete")
    writer = CommunicationWriterTGZ()
    try:
        writer.open(output_file)
        writer.write(comm)
    finally:
        writer.close()

    assert tarfile.is_tarfile(output_file)

    f = tarfile.open(output_file)

    tarinfo = f.next()
    assert tarinfo is not None

    assert comm.uuid.uuidString + ".concrete" == tarinfo.name
    assert tarinfo.isreg()
    assert tarinfo.mtime > time.time() - TIME_MARGIN
    assert os.stat("tests/testdata/simple_1.concrete").st_size == tarinfo.size
    assert 0644 == tarinfo.mode
    assert login_info["uid"] == tarinfo.uid
    assert login_info["username"] == tarinfo.uname
    assert login_info["gid"] == tarinfo.gid
    assert login_info["groupname"] == tarinfo.gname

    tarinfo = f.next()
    assert tarinfo is None

    f.close()
Пример #2
0
def test_CommunicationWriterTGZ_single_file_default_name(output_file):
    comm = read_communication_from_file("tests/testdata/simple_1.concrete")
    writer = CommunicationWriterTGZ()
    writer.open(output_file)
    writer.write(comm)
    writer.close()

    assert tarfile.is_tarfile(output_file)

    f = tarfile.open(output_file)

    tarinfo = f.next()
    assert tarinfo is not None

    assert comm.uuid.uuidString + '.concrete' == tarinfo.name
    assert tarinfo.isreg()
    assert tarinfo.mtime > time.time() - TIME_MARGIN
    assert os.stat('tests/testdata/simple_1.concrete').st_size == tarinfo.size
    assert 0644 == tarinfo.mode
    assert os.getuid() == tarinfo.uid
    assert pwd.getpwuid(os.getuid()).pw_name == tarinfo.uname
    assert os.getgid() == tarinfo.gid
    assert grp.getgrgid(os.getgid()).gr_name == tarinfo.gname

    tarinfo = f.next()
    assert tarinfo is None

    f.close()
Пример #3
0
    ofd = CommunicationWriterTGZ(options.output)
    with reader(gzip.open(options.input)) as ifd:
        for i, line in enumerate(ifd):
            toks = line.strip().split("\t")
            if len(toks) != 3:
                continue            
            cid, label, text = toks
            g = ugf.create()
            t = int(time())
            comm = Communication(id=cid,
                                 uuid=g.next(),
                                 type="Text document",
                                 text=text,
                                 communicationTaggingList=[CommunicationTagging(uuid=g.next(),
                                                                                metadata=AnnotationMetadata(tool="Gold labeling",
                                                                                                            timestamp=t,
                                                                                                            kBest=1,
                                                                                ),
                                                                                taggingType=options.tag_type,
                                                                                tagList=[label],
                                                                                confidenceList=[1.0],
                                 )],
                                 metadata=AnnotationMetadata(tool="text_to_concrete.py ingester", timestamp=t, kBest=1),
                                 sectionList=[Section(uuid=g.next(),
                                                      textSpan=TextSpan(start=0, ending=len(text)),
                                                      kind="content",
                                                      )
                                 ])
            ofd.write(comm)
    ofd.close()