Exemplo n.º 1
0
def index_v2_page(doc_type):
    item_per_page = 50
    page = request.args.get('p', 1)
    page = int(page)

    total = Doc.objects.filter(type=doc_type).count()
    total_page = math.ceil(total / item_per_page)
    paginator = Pagination(Doc.objects(type=doc_type).order_by('seq'), page, 50)
    docs = paginator.items

    docs_data = []
    for doc in docs:
        item = doc.dump()
        item['sent_total'] = Sent.objects(doc=doc).count()
        item['progress'] = Annotation.objects(doc=doc, user=g.user, type='sentence').count()

        docs_data.append(item)

    pagination = {
        'page': page,
        'total_page': total_page,
        'left': max(1, page - 5),
        'right': min(page + 5, total_page),
    }

    return render_template('index.html', type=doc_type, docs=docs_data, g=g, pagination=pagination)
Exemplo n.º 2
0
def generate_encrypted_file(seq_id):
    from itertools import cycle

    def str_xor(s1, s2):
        result = []
        for (c1, c2) in zip(s1, cycle(s2)):
            result.append(str(ord(c1) ^ ord(c2)))
        return ",".join(result)

    try:
        doc = Doc.objects().get(seq=seq_id)
        sents = Sent.objects(doc=doc).order_by('index')
    except Exception:
        return

    data = {
        'doc_id': str(doc.id),
        'title': doc.title,
        'seq': doc.seq,
        'sents': [],
    }

    for sent in sents:
        data['sents'].append(sent.dump())

    data = json.dumps(data)
    data = str_xor(data, config.Config.ENCRYPTION_KEY)
    file_path = os.path.abspath(
        os.path.dirname(__file__) +
        '/../data/encrypted/#{}_{}'.format(seq_id, doc.title))
    with open(file_path, 'w') as f:
        f.write(data)
Exemplo n.º 3
0
def doc_migration():
    docs = Doc.objects().all()
    for doc in tqdm(docs):
        if doc.mturk:
            doc.type = 'mturk'
        else:
            doc.type = 'v1'
        doc.save()
Exemplo n.º 4
0
def delete_doc(doc_id):
    doc = Doc.objects().get(id=doc_id)
    sents = Sent.objects(doc=doc).order_by('index')
    for sent in sents:
        sent.delete()
    annotations = Sent.objects(doc=doc)
    for annotation in annotations:
        annotation.delete()
    doc.delete()
Exemplo n.º 5
0
def target_migration():
    docs = Doc.objects().all()
    for doc in tqdm(docs):
        doc.text = doc.text.replace('<<TARGET>>', '(TARGET)')
        doc.save()

    sents = Sent.objects()
    for sent in tqdm(sents):
        sent.text = sent.text.replace('<<TARGET>>', '(TARGET)')
        sent.save()
Exemplo n.º 6
0
def post_annotation():
    data = request.get_json()

    doc = data['doc']
    target_text = data['target_text']
    index = data['index']
    anchor_offset = data['anchor_offset']
    focus_offset = data['focus_offset']
    type = data['type']
    basket = data['basket']

    doc = Doc.objects().get(id=doc)
    sent = Sent.objects().get(doc=doc, index=index)
    user = g.user

    target_sent = Sent.objects().get(doc=doc, index=index)

    # In sentence, filter logic have to be changed
    if type == 'sentence':
        annotations = Annotation.objects.filter(doc=doc,
                                                sent=sent,
                                                index=index,
                                                user=g.user,
                                                type=type)
    else:
        annotations = Annotation.objects.filter(doc=doc,
                                                sent=sent,
                                                index=index,
                                                user=g.user,
                                                type=type,
                                                anchor_offset=anchor_offset)

    if annotations.count() > 0:
        annotation = annotations[0]
    else:
        annotation = Annotation(doc=doc,
                                sent=sent,
                                user=user,
                                index=index,
                                type=type,
                                anchor_offset=anchor_offset)

    annotation.anchor_offset = anchor_offset
    annotation.focus_offset = focus_offset
    annotation.entire_text = target_sent.text
    annotation.target_text = target_text
    annotation.basket = basket
    annotation.ip = request.remote_addr

    annotation.save()

    return json.dumps({
        'annotation': annotation.dump(),
    })
Exemplo n.º 7
0
def get_annotation(doc_id):
    try:
        doc = Doc.objects().get(id=doc_id)
        annotations = Annotation.objects(doc=doc, user=g.user)
    except Exception as e:
        return Response('not found', status=404)
    data = []
    for annotation in annotations:
        data.append(annotation.dump())

    return json.dumps({
        'annotations': data,
    })
Exemplo n.º 8
0
def duplicate_doc(from_type='v2', to_type='v3'):
    docs = Doc.objects(type=from_type).all()
    for doc in tqdm(docs):
        title = doc.title.replace('TARGET_ONLY', to_type)
        new_doc = Doc(title=title,
                      text=doc.text,
                      source=doc.source,
                      type=to_type)
        new_doc.seq = Doc.objects.count() + 1
        new_doc.save()

        sents = Sent.objects(doc=doc).all()
        for sent in sents:
            Sent(index=sent.index, text=sent.text, doc=new_doc).save()
Exemplo n.º 9
0
def delete_doc_type(doc_type='v3'):
    docs = Doc.objects(type=doc_type).all()
    for doc in tqdm(docs):
        delete_doc(doc.id)
Exemplo n.º 10
0
def generate_encrypted_files():
    docs = Doc.objects().all()
    for doc in tqdm(docs):
        if not (doc.type == 'v1' or doc.type == 'v2' or doc.type == 'v3'):
            continue
        generate_encrypted_file(seq_id=doc.seq)