Exemplo n.º 1
0
def createComm(cid, ctype, txt):
    '''Creates concrete communication file from text'''
    comm = Communication()
    comm.id = cid
    comm.uuid = concrete.util.generate_UUID()
    comm.type = ctype
    txt = re.sub('[\xa0\xc2]', ' ', txt)
    txt = re.sub(r'\s*\n\s*', '\n', txt)
    if not txt.strip():
        return None
    comm.text = txt
    comm.metadata = create_dummy_annotation()

    breaks = [
        i for i, ch in enumerate(txt)
        if ch == '\n' and i > 0 and txt[i - 1] != '\n'
    ]
    if not breaks or breaks[-1] != len(txt) - 1:
        breaks += [len(txt)]

    sections = []
    start = 0
    for i in breaks:
        sec = concrete.Section()
        sec.uuid = concrete.util.generate_UUID()
        sec.kind = "Passage"
        sec.textSpan = concrete.TextSpan(start, i)
        sections.append(sec)
        start = i

    comm.sectionList = sections

    if not concrete.validate.validate_communication(comm):
        return None
    return comm
Exemplo n.º 2
0
def test_validate_minimal_communication_with_uuid():
    comm = Communication()
    comm.id = "myID"
    comm.metadata = AnnotationMetadata(tool="TEST", timestamp=int(time.time()))
    comm.type = "Test Communication"
    comm.uuid = generate_UUID()
    assert validate_communication(comm)
Exemplo n.º 3
0
def test_validate_minimal_communication_with_uuid():
    comm = Communication()
    comm.id = "myID"
    comm.metadata = AnnotationMetadata(
        tool="TEST", timestamp=int(time.time()))
    comm.type = "Test Communication"
    comm.uuid = generate_UUID()
    assert validate_communication(comm)
Exemplo n.º 4
0
def createComm(fn):
    with codecs.open(fn, 'r', 'utf-8') as f:
        txt = f.read()

    comm = Communication()
    comm.id = fn
    comm.uuid = concrete.util.generate_UUID()
    comm.type = "QUORA ANSWER" if fn.split('/')[-1].startswith(
        "answer") else "QUORA QUESTION"
    txt = re.sub('[\xa0\xc2]', ' ', txt)
    txt = re.sub(r'\s*\n\s*', '\n', txt)
    if not txt.strip():
        return None
    comm.text = txt
    comm.metadata = create_dummy_annotation()

    breaks = [
        i for i, ch in enumerate(txt)
        if ch == '\n' and i > 0 and txt[i - 1] != '\n'
    ]
    if not breaks or breaks[-1] != len(txt) - 1:
        breaks += [len(txt)]

    sections = []
    start = 0
    for i in breaks:
        sec = concrete.Section()
        sec.uuid = concrete.util.generate_UUID()
        sec.kind = "Passage"
        sec.textSpan = concrete.TextSpan(start, i)
        sections.append(sec)
        start = i

    comm.sectionList = sections

    if not concrete.validate.validate_communication(comm):
        return None
    return comm