Exemplo n.º 1
0
def ml_run(dsid):
    """
    Dataset should be annotated at Lexonomy so we can download it and start ML process.
    ML statuses: Starting_ML -> ML_Format -> ML_Annotated -> Lex_Format
    Error statuses: Lex2ML_Error, ML_Error, ML2Lex_Error
    """
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    # get annotations first, so we get lex_xml path in db
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    if dataset.status['annotate'] != 'Ready':
        raise InvalidUsage('File is not annotated at Lexonomy.',
                           status_code=409,
                           enum='STATUS_ERROR')
    get_lex_xml(uid, dsid)
    dataset = Datasets.list_datasets(uid, dsid=dsid)

    # deleting preview
    dataset.status['preview'] = None
    Datasets.dataset_add_ml_lexonomy_access(dsid)
    if dataset.lexonomy_ml_delete is not None:
        requests.post(dataset.lexonomy_ml_delete,
                      headers={
                          "Content-Type": 'application/json',
                          "Authorization": app.config['LEXONOMY_AUTH_KEY']
                      })

    if dataset.status['ml'] in ['Starting_ML', 'ML_Format', 'ML_Annotated']:
        raise InvalidUsage('ML is already running.',
                           status_code=409,
                           enum='STATUS_ERROR')
    print_log(app.name, '{} Starting ML'.format(dataset))
    dataset.status['ml'] = 'Starting_ML'
    Datasets.dataset_status(dsid, set=True, status=dataset.status)
    # Get files ready
    xml_raw = dataset.xml_file_path
    xml_ml_out = dataset.xml_lex[:-4] + '-ML_OUT.xml'
    Datasets.dataset_add_ml_paths(dsid,
                                  xml_lex=dataset.xml_lex,
                                  xml_ml_out=xml_ml_out)
    # Run ml
    task = run_pdf2lex_ml_scripts.apply_async(
        args=[uid, dsid, xml_raw, dataset.xml_lex, xml_ml_out], countdown=0)
    Datasets.dataset_ml_task_id(dsid, set=True, task_id=task.id)
    return flask.make_response(
        {
            'message': 'ok',
            'dsid': dsid,
            'status': dataset.status['ml']
        }, 200)
Exemplo n.º 2
0
def delete_ml(dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    local = flask.request.args.get('local', default=None, type=str) == 'True'

    if local:
        try:
            print_log(app.name, 'Deleting local ML files: {}'.format(dataset))
            json_ml_in = '/var/www/elexifier-api/app/media/ML-IN-{}.json'.format(
                str(dsid))
            json_ml_out = '/var/www/elexifier-api/app/media/ML-OUT-{}.json'.format(
                str(dsid))
            if dataset.xml_lex != "":
                os.remove(dataset.xml_lex)
            if dataset.xml_ml_out != "":
                os.remove(dataset.xml_ml_out)
            os.remove(json_ml_in)
            os.remove(json_ml_out)
        except:
            pass
        Datasets.dataset_add_ml_paths(dsid)
    else:
        print_log(app.name,
                  'Deleting Lexonomy preview file: {}'.format(dataset))
        if dataset.lexonomy_ml_delete is not None:
            requests.post(dataset.lexonomy_ml_delete,
                          headers={
                              "Content-Type": 'application/json',
                              "Authorization": app.config['LEXONOMY_AUTH_KEY']
                          })
        Datasets.dataset_add_ml_lexonomy_access(db, dsid)

    return flask.make_response({'message': 'OK'}, 200)
Exemplo n.º 3
0
def lexonomy_download(uid, dsid):
    if flask.request.headers.get('Authorization') != app.config['LEXONOMY_AUTH_KEY']:
        raise InvalidUsage("Shared secret is not valid!", status_code=401, enum='UNAUTHORIZED')

    ml = flask.request.args.get('ml', default="False", type=str) == "True"
    additional_pages = flask.request.args.get('add_pages', default="False", type=str) == "True"
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    if ml:  # Set datasets status
        dataset.status['preview'] = 'Processing'
        Datasets.dataset_status(dsid, set=True, status=dataset.status)
    else:
        dataset.status['annotate'] = 'Processing'
        Datasets.dataset_status(dsid, set=True, status=dataset.status)

    temp_fname = dataset.xml_file_path.split(".xml")[0] + "-tmp.xml"

    @after_this_request
    def remove_file(response):
        os.remove(temp_fname)
        return response

    if ml:
        # Send ml file
        split_preview(dataset.xml_ml_out, temp_fname, 100)
        return flask.send_file(temp_fname, attachment_filename=dataset.xml_ml_out.split('/')[-1], as_attachment=True)

    elif not additional_pages:
        # Send first 20 pages file
        first_n_pages(dataset.xml_file_path, temp_fname, 20)
        return flask.send_file(temp_fname, attachment_filename=dataset.xml_file_path.split('/')[-1], as_attachment=True)
    else:
        # Send additional 20 pages file
        additional_n_pages(dataset.xml_file_path, dataset.xml_lex, temp_fname, 20)
        return flask.send_file(temp_fname, attachment_filename=dataset.xml_file_path.split('/')[-1], as_attachment=True)
Exemplo n.º 4
0
def repair_status():
    """
    implement a method, that repairs all dataset statuses.
    status should be json: {'annotate': [None, 'Starting', 'Processing', 'Lexonomy_Error', 'Ready'],
                            'ml': [None, 'Starting_ML', 'Lex2ML_Error', 'ML_Format', 'ML_Error', 'ML_Annotated', 'ML2Lex_Error', 'Lex_Format'],
                            'preview': [None, 'Starting', 'Processing', 'Lexonomy_Error', 'Ready'],
                            'download': [None, 'Preparing_download', 'Ready']}
    delete method after, leave status description
    """
    for dsid in range(0, 1000):
        try:
            dataset = Datasets.list_datasets(None, dsid=dsid)
            status = {
                'preview':
                None if dataset.lexonomy_ml_access is None else 'Ready',
                'ml':
                None if dataset.lexonomy_ml_access is None else 'Lex_Format',
                'annotate':
                None if dataset.lexonomy_access is None else 'Ready',
                'download': None
            }
            Datasets.dataset_status(dsid, set=True, status=status)
        except:
            continue
    return flask.make_response({'msg': 'ok'}, 200)
Exemplo n.º 5
0
def ml_download(dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    dataset = Datasets.list_datasets(uid, dsid=dsid)

    # TODO: This checks can be replaced: if preview exists (is Ready), then get it from Lexonomy and download it
    # TODO: otherwise notify user to send ml output to preview
    # check if ml output is ready for download
    if dataset.xml_ml_out is None or dataset.xml_ml_out is '':
        raise InvalidUsage('No file for download. Try running ML first.',
                           status_code=409,
                           enum='STATUS_ERROR')
    elif dataset.status['ml'] in [
            None, 'Starting_ML', 'Lex2ML_Error', 'ML_Format', 'ML_Error',
            'ML_Annotated', 'ML2Lex_Error'
    ]:
        raise InvalidUsage(
            'File is not ready for download. Wait for ML to finish first.',
            status_code=409,
            enum='STATUS_ERROR')

    tmp_file = dataset.xml_ml_out.split(".xml")[0] + "_TEI.xml"

    # stop if already preparing download
    if dataset.status['download'] == 'Preparing_download':
        return flask.make_response(
            {
                'msg': 'Dataset is preparing for download',
                'status': dataset.status
            }, 200)
    # if download is ready, return file
    elif dataset.status['download'] == 'Ready':
        dataset.status['download'] = None
        Datasets.dataset_status(dsid, set=True, status=dataset.status)

        @after_this_request
        def after(response):
            response.headers['x-suggested-filename'] = filename
            response.headers.add('Access-Control-Expose-Headers', '*')
            os.remove(tmp_file)
            return response

        filename = dataset.name.split('.')[0] + '-transformed.xml'
        return flask.send_file(tmp_file,
                               attachment_filename=filename,
                               as_attachment=True,
                               conditional=True)

    # prepare download
    dataset.status['download'] = 'Preparing_download'
    Datasets.dataset_status(dsid, set=True, status=dataset.status)
    character_map = Datasets.dataset_character_map(dsid)
    prepare_TEI_download.apply_async(
        args=[dsid, dataset.xml_ml_out, tmp_file, character_map])
    return flask.make_response(
        {
            'msg': 'Dataset is preparing for download',
            'status': dataset.status['download']
        }, 200)
Exemplo n.º 6
0
def xf_list_all_transforms():
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    datasets = Datasets.list_datasets(uid)
    transformations = []
    for dataset in datasets:
        _transformations = controllers.list_transforms(dataset.id)
        for xf in _transformations:
            xf.name = dataset.name + '/' + xf.name
            transformations.append(Transformer.to_dict(xf))
    return flask.make_response(flask.jsonify(transformations), 200)
Exemplo n.º 7
0
def delete_lexonomy(dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    dataset = Datasets.list_datasets(uid, dsid=dsid)

    if dataset.lexonomy_delete is not None:
        requests.post(dataset.lexonomy_delete,
                      headers={"Content-Type": 'application/json',
                               "Authorization": app.config['LEXONOMY_AUTH_KEY']})

    Datasets.dataset_add_lexonomy_access(dsid)
    return flask.make_response({'message': 'OK'}, 200)
Exemplo n.º 8
0
def prepare_download(uid, xfid, dsid, strip_ns, strip_header, strip_DictScrap):
    try:
        transformer = controllers.list_transforms(dsid, xfid=xfid)
        dataset = Datasets.list_datasets(uid, dsid=dsid)
        metadata = Datasets.dataset_metadata(dsid)
        xf = transformer.transform
        ds_path = dataset.file_path
        file_name = dataset.name
        header_Title = metadata['title']
        header_Bibl = metadata['bibliographicCitation']
        header_Publisher = metadata['publisher']

        orig_xml = open(ds_path, 'rb').read()
        parserLookup = lxml.etree.ElementDefaultClassLookup(
            element=DictTransformator.TMyElement)
        myParser = lxml.etree.XMLParser()
        myParser.set_element_class_lookup(parserLookup)
        entity_xml = lxml.etree.fromstring(orig_xml, parser=myParser)
        mapping = DictTransformator.TMapping(xf)
        mapper = DictTransformator.TMapper()
        out_TEI, out_aug = mapper.Transform(
            mapping, [], [lxml.etree.ElementTree(entity_xml)],
            makeAugmentedInputTrees=True,
            stripForValidation=strip_ns,
            stripHeader=strip_header,
            stripDictScrap=strip_DictScrap,
            promoteNestedEntries=True,
            headerTitle=header_Title,
            headerPublisher=header_Publisher,
            headerBibl=header_Bibl,
            metadata=metadata)
        target_xml = lxml.etree.tostring(out_TEI,
                                         pretty_print=True,
                                         encoding='unicode')

        orig_fname, file_type = file_name.split('.')
        target_fname = orig_fname + '_' + str(xfid) + '_TEI.' + file_type
        target_path = os.path.join(app.config['APP_MEDIA'], target_fname)

        open(target_path, 'a').close()
        with open(target_path, 'w') as out:
            out.write(target_xml)
            out.close()
            controllers.transformer_download_status(xfid,
                                                    set=True,
                                                    download_status='Ready')

    except Exception as e:
        print(traceback.format_exc())
        controllers.transformer_download_status(xfid, set=True)  # reset status
        return

    return
Exemplo n.º 9
0
def get_lex_xml(uid, dsid):
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    xml_lex = dataset.xml_file_path[:-4] + "-LEX.xml"
    Datasets.dataset_add_ml_paths(dsid, xml_lex=xml_lex, xml_ml_out=dataset.xml_ml_out)

    request_headers = { "Authorization": app.config['LEXONOMY_AUTH_KEY'], "Content-Type": 'application/json' }
    response = requests.get(dataset.lexonomy_access, headers=request_headers)

    #data = re.search("<BODY.*<\/BODY>", response.text).group()

    f = open(xml_lex, "w")
    f.write(response.text)
    f.close()
    return
Exemplo n.º 10
0
def ml_preview(dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    if dataset.status[
            'ml'] == 'Lex_Format' and dataset.xml_ml_out is None or dataset.xml_ml_out is '':
        raise InvalidUsage('No file for preview. Try running ML first.',
                           status_code=409,
                           enum='STATUS_ERROR')
    ds_sendML_to_lexonomy(uid, dsid)
    return flask.make_response(
        {
            'message': 'ok',
            'dsid': dsid,
            'status': dataset.status
        }, 200)
Exemplo n.º 11
0
def ds_list_datasets():
    token = flask.request.headers.get('Authorization')
    mimetype = flask.request.args.get('mimetype')
    uid = verify_user(token)

    order = flask.request.args.get('order')
    if isinstance(order, str):
        order = order.upper()
    else:
        order = "ASC"
    if not isinstance(mimetype, str):
        mimetype = "text/xml"
    datasets = [
        Datasets.to_dict(i)
        for i in controllers.list_datasets(uid, order=order, mimetype=mimetype)
    ]
    return flask.make_response(jsonify(datasets), 200)
Exemplo n.º 12
0
def ds_sendML_to_lexonomy(uid, dsid):
    user = User.query.filter_by(id=uid).first()
    dataset = Datasets.list_datasets(uid, dsid=dsid)

    if dataset.lexonomy_ml_delete is not None:
        requests.post(dataset.lexonomy_ml_delete,
                      headers={
                          "Content-Type": 'application/json',
                          "Authorization": app.config['LEXONOMY_AUTH_KEY']
                      })

    request_data = {
        'xml_file':
        '/api/lexonomy/' + str(uid) + '/download/' + str(dsid) + "?ml=True",
        'email': user.email,
        'filename': dataset.name + ' - preview',
        'type': 'preview',
        'url': app.config['URL'],
        'return_to': ""  # remove if no longer required
    }

    if user.sketch_engine_uid is not None:  # ske user
        request_data['ske_user'] = True
    else:
        request_data['ske_user'] = False

    print('Starting asynchronous request to Lexonomy')
    make_lexonomy_request.apply_async(args=[dsid, request_data],
                                      kwargs={"ml": True},
                                      countdown=0)

    # Update dataset status
    status = Datasets.dataset_status(dsid)
    status['preview'] = 'Starting'
    Datasets.dataset_status(dsid, set=True, status=status)
    msg = 'OK'
    return flask.make_response(
        {
            'message': msg,
            'dsid': dsid,
            'status': status['preview'],
            'test_request': request_data
        }, 200)
Exemplo n.º 13
0
def ds_download2(xfid, dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    status = controllers.transformer_download_status(xfid)

    get_status = flask.request.args.get('status', default='false',
                                        type=str) == 'true'

    if get_status:
        return flask.make_response({'status': status}, 200)

    elif status is None:
        print_log(
            app.name,
            'Transformed dataset download started uid: {0:s}, xfid: {1:s} , dsid: {2:s}'
            .format(str(uid), str(xfid), str(dsid)))
        strip_ns = flask.request.args.get('strip_ns',
                                          default='false',
                                          type=str) == 'true'
        strip_header = flask.request.args.get('strip_header',
                                              default='false',
                                              type=str) == 'true'
        strip_DictScrap = flask.request.args.get('strip_DictScrap',
                                                 default='false',
                                                 type=str) == 'true'
        strip_DictScrap = strip_ns  # TODO: remove this, when added to FE

        # Check if transformer exists
        try:
            transform = controllers.list_transforms(dsid, xfid=xfid)
            xf = transform.transform
        except:
            raise InvalidUsage('Transformer does not exist.', status_code=409)

        if xf is None:  # Not sure why this is needed here?
            return flask.make_response(
                {
                    'spec': None,
                    'entity_xml': None,
                    'output': None
                }, 200)
        else:
            # start download task
            prepare_download.apply_async(args=[
                uid, xfid, dsid, strip_ns, strip_header, strip_DictScrap
            ],
                                         countdown=0)
            status = 'Processing'
            controllers.transformer_download_status(xfid,
                                                    set=True,
                                                    download_status=status)

    elif status == "Processing":
        return flask.make_response({'message': 'File is still processing'},
                                   200)

    elif status == "Ready":
        print_log(
            app.name,
            'Transformed dataset download finished uid: {0:s}, xfid: {1:s} , dsid: {2:s}'
            .format(str(uid), str(xfid), str(dsid)))
        # return file and delete afterwards
        dataset = Datasets.list_datasets(uid, dsid=dsid)
        file_name, file_type = dataset.name.split('.')
        target_file_name = file_name + '_' + str(xfid) + '_TEI.' + file_type
        target_path = os.path.join(app.config['APP_MEDIA'], target_file_name)

        @after_this_request
        def remove_file(response):
            response.headers['x-suggested-filename'] = out_name
            response.headers.add('Access-Control-Expose-Headers', '*')
            os.remove(target_path)
            return response

        controllers.transformer_download_status(xfid, set=True)  # reset status
        transform_name = controllers.list_transforms(dsid, xfid=xfid).name
        out_name = dataset.name[:-4] + '-' + transform_name + '.xml'
        return flask.send_file(target_path,
                               attachment_filename=out_name,
                               as_attachment=True)

    return flask.make_response({'message': 'ok', 'status': status}, 200)
Exemplo n.º 14
0
def ds_upload_new_dataset():
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)

    # file
    metadata = flask.request.form.get('metadata', None)
    dictname = flask.request.files.get('dictname', None)
    file_content = flask.request.files.get('file', None)
    total_filesize = flask.request.form.get('dztotalfilesize', None)
    dzuuid = flask.request.form.get('dzuuid', None)

    current_chunk = int(flask.request.form.get('dzchunkindex'))
    total_chunks = int(flask.request.form.get('dztotalchunkcount', None))
    chunk_offset = int(flask.request.form.get('dzchunkbyteoffset', None))

    # get file extension
    try:
        orig_filename = file_content.filename
        extension = '.' + file_content.filename.split('.')[-1]
    except AttributeError:
        orig_filename = 'Dictionary'
        extension = '.xml'

    filename = "tempFile_USER-{0:s}".format(str(uid)) + extension
    filepath = os.path.join(app.config['APP_MEDIA'], secure_filename(filename))

    if os.path.exists(filepath) and current_chunk == 0:
        os.remove(filepath)
        raise InvalidUsage('File already exists.',
                           status_code=400,
                           enum='FILE_EXISTS')

    try:  # write to file
        with open(filepath, 'ab') as f:
            f.seek(chunk_offset)
            f.write(file_content.stream.read())
    except OSError:
        raise InvalidUsage(
            "Not sure why, but we couldn't write the file to disk.",
            status_code=500,
            enum="FILE_ERROR")

    if current_chunk != total_chunks:
        return flask.make_response(
            jsonify({
                'status': 'OK',
                'filename': filename,
                'current_chunk': current_chunk,
                'total_chunks': total_chunks
            }), 200)
    else:  # finish upload
        if os.path.getsize(filepath) != int(total_filesize):
            os.remove(filepath)
            raise InvalidUsage("Size mismatch.",
                               status_code=500,
                               enum="FILE_ERROR")
        else:
            new_random_name = generate_filename(filename)
            new_path = os.path.join(app.config['APP_MEDIA'],
                                    secure_filename(new_random_name))
            os.rename(filepath, new_path)
            dsid = controllers.add_dataset(db, uid, total_filesize,
                                           orig_filename, new_path, dzuuid)
            controllers.dataset_metadata(dsid, set=True, metadata=metadata)

            # prepare dataset
            dataset = controllers.list_datasets(uid, dsid)
            if "pdf" in dataset.upload_mimetype:
                controllers.transform_pdf2xml.apply_async(args=[dsid])
            else:
                controllers.clean_empty_namespace(dsid)
                controllers.map_xml_tags.apply_async(args=[dsid])
        return flask.make_response(Datasets.to_dict(dataset), 200)
Exemplo n.º 15
0
def ds_dataset_info(dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    dataset = controllers.list_datasets(uid, dsid=dsid)
    dataset = Datasets.to_dict(dataset)
    return flask.make_response(jsonify(dataset), 200)
Exemplo n.º 16
0
def ds_send_to_lexonomy(dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    user = User.query.filter_by(id=uid).first()
    db.session.close()
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    additional_pages = flask.request.args.get('add_pages',
                                              default='0',
                                              type=str).lower() == '1'

    if additional_pages:
        # get file from lexonomy and save it
        get_lex_xml(uid, dsid)

    # Reset dataset status and delete old files @Lexonomy
    dataset.status['ml'] = None
    dataset.status['preview'] = None
    if dataset.lexonomy_delete is not None:
        requests.post(dataset.lexonomy_delete,
                      headers={
                          "Content-Type": 'application/json',
                          "Authorization": app.config['LEXONOMY_AUTH_KEY']
                      })
    if dataset.lexonomy_ml_delete is not None:
        requests.post(dataset.lexonomy_ml_delete,
                      headers={
                          "Content-Type": 'application/json',
                          "Authorization": app.config['LEXONOMY_AUTH_KEY']
                      })

    request_data = {
        'xml_file':
        '/api/lexonomy/{}/download/{}'.format(uid, dsid) +
        ('?add_pages=True' if additional_pages else ''),
        'email':
        user.email,
        'filename':
        dataset.name + ' - annotate',
        'type':
        'edit',
        'url':
        app.config['URL'],
        'ske_user':
        True if user.sketch_engine_uid is not None else False,
        'return_to':
        ""  # remove if no longer required
    }

    print_log(app.name,
              'Starting asynchronous request to Lexonomy {}'.format(dataset))
    make_lexonomy_request.apply_async(args=[dsid, request_data], countdown=0)

    # Update dataset status
    dataset.status['annotate'] = 'Starting'
    Datasets.dataset_status(dsid, set=True, status=dataset.status)

    return flask.make_response(
        {
            'message': 'OK',
            'dsid': dsid,
            'status': dataset.status['annotate'],
            'test_request': request_data
        }, 200)
Exemplo n.º 17
0
def ds_machine_learning(dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)

    xml_format = flask.request.args.get('xml_format', default=None,
                                        type=str) == 'True'
    get_file = flask.request.args.get('get_file', default=None,
                                      type=str) == 'True'
    run_ml = flask.request.args.get('run_ml', default=None, type=str) == 'True'
    send_file = flask.request.args.get('send_file', default=None,
                                       type=str) == 'True'

    # TODO: Save paths to DB
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    xml_lex = dataset.xml_lex
    xml_raw = dataset.xml_file_path
    print('xml_lex:', xml_lex, 'xml_raw:', xml_raw)

    if xml_lex == None:
        xml_ml_out = None
    else:
        xml_ml_out = xml_lex[:-4] + "-ML_OUT.xml"
    Datasets.dataset_add_ml_paths(dsid,
                                  xml_lex=dataset.xml_lex,
                                  xml_ml_out=xml_ml_out)

    # Check if all params are None
    if xml_format is None and get_file is None and run_ml is None and send_file is None:
        raise InvalidUsage("Invalid API call. No params.",
                           status_code=422,
                           enum="GET_ERROR")
    # Check if to many params
    elif xml_format and (get_file or run_ml or send_file):
        raise InvalidUsage("Invalid API call. Can't work on file and send it.",
                           status_code=422,
                           enum="GET_ERROR")

    dataset.ml_task_id = Datasets.dataset_ml_task_id(dsid)
    status = dataset.status

    # Check if dataset has ml_task, then send status
    if dataset.ml_task_id:
        return flask.make_response(
            {
                "message": "File is still processing.",
                "dsid": dsid,
                "Status": status
            }, 200)

    # Check if user wants file and then return it
    if xml_format and status not in [
            'Starting_ML', 'ML_Format', 'ML_Annotated', 'Lex2ML_Error',
            'ML_Error', 'ML2Lex_Error'
    ]:
        # TODO: get the latest annotated version from Lexonomy
        Datasets.update_dataset_status(dsid, 'Preparing_download')
        tmp_file = xml_ml_out.split(".xml")[0] + "_TEI.xml"
        character_map = Datasets.dataset_character_map(dsid)
        prepare_TEI_download(dsid, xml_ml_out, tmp_file, character_map)
        #tokenized2TEI(dsid, xml_ml_out, tmp_file, character_map)

        @after_this_request
        def after(response):
            response.headers['x-suggested-filename'] = filename
            response.headers.add('Access-Control-Expose-Headers', '*')
            Datasets.update_dataset_status(dsid, 'Lex_Format')
            os.remove(tmp_file)
            return response

        filename = dataset.name.split('.')[0] + '-transformed.xml'
        return flask.send_file(tmp_file,
                               attachment_filename=filename,
                               as_attachment=True)
    elif xml_format:
        raise InvalidUsage("File is not ready. Try running ML again",
                           status_code=202,
                           enum="STATUS_ERROR")

    # Run ML scripts
    if get_file:  # Get file from Lexonomy
        status = "Lexonomy_Annotated"
        get_lex_xml(uid, dsid)
        Datasets.update_dataset_status(dsid, status)

    elif run_ml:
        status = "Starting_ML"
        Datasets.update_dataset_status(dsid, status)
        task = run_pdf2lex_ml_scripts.apply_async(
            args=[uid, dsid, xml_raw, xml_lex, xml_ml_out], countdown=0)
        Datasets.dataset_ml_task_id(dsid, set=True, task_id=task.id)

    elif send_file:  # Send file to Lexonomy
        # stauts = "ML_Annotated_@Lexonomy"
        ds_sendML_to_lexonomy(uid, dsid)

    return flask.make_response(
        {
            "message": "OK",
            "dsid": dsid,
            "Status": status
        }, 200)
Exemplo n.º 18
0
def ml_status(dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    return flask.make_response({'dsid': dsid, 'status': dataset.status}, 200)