def xf_list_transforms(dsid): token = flask.request.headers.get('Authorization') id = verify_user(token) order = flask.request.args.get('order').upper() rv = controllers.list_transforms(dsid, order=order) rv = [Transformer.to_dict(i) for i in rv] return flask.make_response(flask.jsonify(rv), 200)
def delete_ml(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) dataset = Datasets.list_datasets(uid, dsid=dsid) local = flask.request.args.get('local', default=None, type=str) == 'True' if local: try: print_log(app.name, 'Deleting local ML files: {}'.format(dataset)) json_ml_in = '/var/www/elexifier-api/app/media/ML-IN-{}.json'.format( str(dsid)) json_ml_out = '/var/www/elexifier-api/app/media/ML-OUT-{}.json'.format( str(dsid)) if dataset.xml_lex != "": os.remove(dataset.xml_lex) if dataset.xml_ml_out != "": os.remove(dataset.xml_ml_out) os.remove(json_ml_in) os.remove(json_ml_out) except: pass Datasets.dataset_add_ml_paths(dsid) else: print_log(app.name, 'Deleting Lexonomy preview file: {}'.format(dataset)) if dataset.lexonomy_ml_delete is not None: requests.post(dataset.lexonomy_ml_delete, headers={ "Content-Type": 'application/json', "Authorization": app.config['LEXONOMY_AUTH_KEY'] }) Datasets.dataset_add_ml_lexonomy_access(db, dsid) return flask.make_response({'message': 'OK'}, 200)
def ml_download(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) dataset = Datasets.list_datasets(uid, dsid=dsid) # TODO: This checks can be replaced: if preview exists (is Ready), then get it from Lexonomy and download it # TODO: otherwise notify user to send ml output to preview # check if ml output is ready for download if dataset.xml_ml_out is None or dataset.xml_ml_out is '': raise InvalidUsage('No file for download. Try running ML first.', status_code=409, enum='STATUS_ERROR') elif dataset.status['ml'] in [ None, 'Starting_ML', 'Lex2ML_Error', 'ML_Format', 'ML_Error', 'ML_Annotated', 'ML2Lex_Error' ]: raise InvalidUsage( 'File is not ready for download. Wait for ML to finish first.', status_code=409, enum='STATUS_ERROR') tmp_file = dataset.xml_ml_out.split(".xml")[0] + "_TEI.xml" # stop if already preparing download if dataset.status['download'] == 'Preparing_download': return flask.make_response( { 'msg': 'Dataset is preparing for download', 'status': dataset.status }, 200) # if download is ready, return file elif dataset.status['download'] == 'Ready': dataset.status['download'] = None Datasets.dataset_status(dsid, set=True, status=dataset.status) @after_this_request def after(response): response.headers['x-suggested-filename'] = filename response.headers.add('Access-Control-Expose-Headers', '*') os.remove(tmp_file) return response filename = dataset.name.split('.')[0] + '-transformed.xml' return flask.send_file(tmp_file, attachment_filename=filename, as_attachment=True, conditional=True) # prepare download dataset.status['download'] = 'Preparing_download' Datasets.dataset_status(dsid, set=True, status=dataset.status) character_map = Datasets.dataset_character_map(dsid) prepare_TEI_download.apply_async( args=[dsid, dataset.xml_ml_out, tmp_file, character_map]) return flask.make_response( { 'msg': 'Dataset is preparing for download', 'status': dataset.status['download'] }, 200)
def ds_fetch_dataset_entry(dsid, entryid): token = flask.request.headers.get('Authorization') id = verify_user(token) headwords = flask.request.args.get('headwords', default='false', type=str) == 'true' rv = Datasets_single_entry.to_dict( controllers.list_dataset_entries(dsid, entry_id=entryid)) return flask.make_response(jsonify(rv), 200)
def entries_search(xfid, dsid): token = flask.request.headers.get('Authorization') id = verify_user(token) pattern = flask.request.args.get('pattern', default='', type=str) result = controllers.search_dataset_entries(db, dsid, xfid, pattern) return flask.make_response({'result': result}, 200)
def user_delete(userid): # THIS IS NOT USED AND IT DOESN'T WORK token = flask.request.headers.get('Authorization') id = verify_user(token) if id != userid: raise InvalidUsage("User ids don't match", status_code=401, enum="UNAUTHORIZED") controllers.delete_user(engine, userid) return flask.make_response(jsonify({ 'message': 'OK'}), 200)
def ds_list_entries(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) headwords = flask.request.args.get('headwords', default='false', type=str) == 'true' # this is not used rv = [ Datasets_single_entry.to_dict(i) for i in controllers.list_dataset_entries(dsid) ] return flask.make_response(jsonify(rv), 200)
def xf_list_all_transforms(): token = flask.request.headers.get('Authorization') uid = verify_user(token) datasets = Datasets.list_datasets(uid) transformations = [] for dataset in datasets: _transformations = controllers.list_transforms(dataset.id) for xf in _transformations: xf.name = dataset.name + '/' + xf.name transformations.append(Transformer.to_dict(xf)) return flask.make_response(flask.jsonify(transformations), 200)
def delete_lexonomy(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) dataset = Datasets.list_datasets(uid, dsid=dsid) if dataset.lexonomy_delete is not None: requests.post(dataset.lexonomy_delete, headers={"Content-Type": 'application/json', "Authorization": app.config['LEXONOMY_AUTH_KEY']}) Datasets.dataset_add_lexonomy_access(dsid) return flask.make_response({'message': 'OK'}, 200)
def delete_error_log(e_id): token = flask.request.headers.get('Authorization') id = verify_user(token) user = User.query.filter_by(id=id).first() db.session.close() #user = controllers.user_data(db, id) if user is not None and not user.admin: raise InvalidUsage('User is not admin.', status_code=401, enum="UNAUTHORIZED") delete_error_log(db, e_id) return flask.make_response({'message': 'ok'}, 200)
def xf_update_transform(xfid): token = flask.request.headers.get('Authorization') id = verify_user(token) xfspec = flask.request.json.get('xfspec', None) saved = flask.request.json.get('saved', False) name = flask.request.json.get('name', None) print_log(app.name, 'Update transform {}'.format(xfid)) if xfspec is None: raise InvalidUsage("Invalid API call.", status_code=422, enum="POST_ERROR") rv = controllers.update_transform(xfid, xfspec, name, saved) return flask.make_response({'updated': rv}, 200)
def xf_delete_transform(xfid): token = flask.request.headers.get('Authorization') id = verify_user(token) resp = controllers.delete_transform(id, xfid) if resp is None: raise InvalidUsage("Transformation does not exist.", status_code=404, enum="TRANSFORMATION_DOESNT_EXIST") elif not resp: raise InvalidUsage("You do not own this transformation", status_code=401, enum="UNAUTHORIZED") else: return flask.make_response({'deleted': xfid}, 200)
def ml_run(dsid): """ Dataset should be annotated at Lexonomy so we can download it and start ML process. ML statuses: Starting_ML -> ML_Format -> ML_Annotated -> Lex_Format Error statuses: Lex2ML_Error, ML_Error, ML2Lex_Error """ token = flask.request.headers.get('Authorization') uid = verify_user(token) # get annotations first, so we get lex_xml path in db dataset = Datasets.list_datasets(uid, dsid=dsid) if dataset.status['annotate'] != 'Ready': raise InvalidUsage('File is not annotated at Lexonomy.', status_code=409, enum='STATUS_ERROR') get_lex_xml(uid, dsid) dataset = Datasets.list_datasets(uid, dsid=dsid) # deleting preview dataset.status['preview'] = None Datasets.dataset_add_ml_lexonomy_access(dsid) if dataset.lexonomy_ml_delete is not None: requests.post(dataset.lexonomy_ml_delete, headers={ "Content-Type": 'application/json', "Authorization": app.config['LEXONOMY_AUTH_KEY'] }) if dataset.status['ml'] in ['Starting_ML', 'ML_Format', 'ML_Annotated']: raise InvalidUsage('ML is already running.', status_code=409, enum='STATUS_ERROR') print_log(app.name, '{} Starting ML'.format(dataset)) dataset.status['ml'] = 'Starting_ML' Datasets.dataset_status(dsid, set=True, status=dataset.status) # Get files ready xml_raw = dataset.xml_file_path xml_ml_out = dataset.xml_lex[:-4] + '-ML_OUT.xml' Datasets.dataset_add_ml_paths(dsid, xml_lex=dataset.xml_lex, xml_ml_out=xml_ml_out) # Run ml task = run_pdf2lex_ml_scripts.apply_async( args=[uid, dsid, xml_raw, dataset.xml_lex, xml_ml_out], countdown=0) Datasets.dataset_ml_task_id(dsid, set=True, task_id=task.id) return flask.make_response( { 'message': 'ok', 'dsid': dsid, 'status': dataset.status['ml'] }, 200)
def user_data(): token = flask.request.headers.get('Authorization') id = verify_user(token) user = User.query.filter_by(id=id).first() db.session.close() if user is not None: response = { 'username': user.username, 'email': user.email, 'admin': user.admin } return flask.make_response(jsonify(response),200) else: raise InvalidUsage('Provide a valid auth token.', status_code=409, enum="INVALID_AUTH_TOKEN")
def ml_preview(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) dataset = Datasets.list_datasets(uid, dsid=dsid) if dataset.status[ 'ml'] == 'Lex_Format' and dataset.xml_ml_out is None or dataset.xml_ml_out is '': raise InvalidUsage('No file for preview. Try running ML first.', status_code=409, enum='STATUS_ERROR') ds_sendML_to_lexonomy(uid, dsid) return flask.make_response( { 'message': 'ok', 'dsid': dsid, 'status': dataset.status }, 200)
def validate_path(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) paths = flask.request.json.get('paths', []) xml_tags = controllers.get_xml_tags(dsid) out = [] for path in paths: flag = True for i in range(len(path) - 1): if path[i + 1] not in xml_tags[path[i]]['child']: flag = False break if flag: out.append(path) return flask.make_response(jsonify({'paths': out}), 200)
def view_error_log(e_id): token = flask.request.headers.get('Authorization') id = verify_user(token) user = User.query.filter_by(id=id).first() db.session.close() if user is not None and not user.admin: raise InvalidUsage('User is not admin.', status_code=401, enum="UNAUTHORIZED") log = get_error_log(db, e_id=e_id) dataset = Datasets.query.filter_by(id=log.dsid).first() pdf = flask.request.args.get('pdf', default=0, type=int) == 1 xml_lex = flask.request.args.get('xml_lex', default=0, type=int) == 1 xml_raw = flask.request.args.get('xml_raw', default=0, type=int) == 1 if xml_raw: return flask.send_file(dataset.xml_file_path, attachment_filename='{0}_xml_raw.xml'.format( dataset.id), as_attachment=True) elif xml_lex: file_path = dataset.xml_file_path.split('.xml')[0] + '-LEX.xml' return flask.send_file(file_path, attachment_filename='{0}_xml_lex.xml'.format( dataset.id), as_attachment=True) elif pdf: file_path = dataset.file_path return flask.send_file(file_path, attachment_filename='{0}_dictionary.pdf'.format( dataset.id), as_attachment=True) # If no params, return log log.message = re.sub('\n', '<br/>', log.message) return flask.make_response( { 'id': log.id, 'dsid': log.dsid, 'tag': log.tag, 'message': log.message, 'time': log.created_ts }, 200)
def ds_list_datasets(): token = flask.request.headers.get('Authorization') mimetype = flask.request.args.get('mimetype') uid = verify_user(token) order = flask.request.args.get('order') if isinstance(order, str): order = order.upper() else: order = "ASC" if not isinstance(mimetype, str): mimetype = "text/xml" datasets = [ Datasets.to_dict(i) for i in controllers.list_datasets(uid, order=order, mimetype=mimetype) ] return flask.make_response(jsonify(datasets), 200)
def list_error_logs(): token = flask.request.headers.get('Authorization') id = verify_user(token) user = User.query.filter_by(id=id).first() db.session.close() if user is not None and not user.admin: raise InvalidUsage('User is not admin.', status_code=401, enum="UNAUTHORIZED") logs = get_error_log(db) logs = [{ 'id': log.id, 'dsid': log.dsid, 'tag': log.tag, 'message': log.message, 'time': log.created_ts } for log in logs] return flask.make_response({'logs': logs}, 200)
def xf_new_transform(): token = flask.request.headers.get('Authorization') id = verify_user(token) dsuuid = flask.request.json.get('dsuuid', None) dsid = flask.request.json.get('dsid', None) xfname = flask.request.json.get('xfname', None) entry_spec = flask.request.json.get('entry_spec', None) headword = flask.request.json.get('hw', None) saved = flask.request.json.get('saved', False) if dsuuid is None or xfname is None or dsid is None or entry_spec is None: raise InvalidUsage("Invalid API call.", status_code=422, enum="POST_ERROR") xfid = controllers.new_transform(xfname, dsid, entry_spec, headword, saved) isok, retmsg = controllers.prepare_dataset(id, dsid, xfid, entry_spec, headword) if not isok: raise InvalidUsage(retmsg, status_code=422, enum="POST_ERROR") return flask.make_response({'xfid': xfid}, 200)
def ds_delete_dataset(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) delete_dataset_async.apply_async(args=[dsid]) return flask.make_response(jsonify({'deleted': dsid}), 200)
def ds_pos(dsid): token = flask.request.headers.get('Authorization') id = verify_user(token) pos_json = flask.request.json nodes = controllers.get_pos_elements(db, dsid, pos_json) return flask.make_response(nodes, 200)
def xf_get_transform_spec(xfid): token = flask.request.headers.get('Authorization') id = verify_user(token) page_num = flask.request.args.get('page_num', default='1', type=int) rv = controllers.describe_transform(xfid, page_num) return flask.make_response(rv, 200)
def xf_list_saved_transforms(): token = flask.request.headers.get('Authorization') id = verify_user(token) rv = controllers.list_saved_transforms(id) rv = flask.jsonify(rv) return flask.make_response(rv, 200)
def ds_save_metadata(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) ds_metadata = flask.request.json.get('ds_metadata', None) rv = controllers.dataset_metadata(dsid, set=True, metadata=ds_metadata) return flask.make_response({'done': rv}, 200)
def ds_download2(xfid, dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) status = controllers.transformer_download_status(xfid) get_status = flask.request.args.get('status', default='false', type=str) == 'true' if get_status: return flask.make_response({'status': status}, 200) elif status is None: print_log( app.name, 'Transformed dataset download started uid: {0:s}, xfid: {1:s} , dsid: {2:s}' .format(str(uid), str(xfid), str(dsid))) strip_ns = flask.request.args.get('strip_ns', default='false', type=str) == 'true' strip_header = flask.request.args.get('strip_header', default='false', type=str) == 'true' strip_DictScrap = flask.request.args.get('strip_DictScrap', default='false', type=str) == 'true' strip_DictScrap = strip_ns # TODO: remove this, when added to FE # Check if transformer exists try: transform = controllers.list_transforms(dsid, xfid=xfid) xf = transform.transform except: raise InvalidUsage('Transformer does not exist.', status_code=409) if xf is None: # Not sure why this is needed here? return flask.make_response( { 'spec': None, 'entity_xml': None, 'output': None }, 200) else: # start download task prepare_download.apply_async(args=[ uid, xfid, dsid, strip_ns, strip_header, strip_DictScrap ], countdown=0) status = 'Processing' controllers.transformer_download_status(xfid, set=True, download_status=status) elif status == "Processing": return flask.make_response({'message': 'File is still processing'}, 200) elif status == "Ready": print_log( app.name, 'Transformed dataset download finished uid: {0:s}, xfid: {1:s} , dsid: {2:s}' .format(str(uid), str(xfid), str(dsid))) # return file and delete afterwards dataset = Datasets.list_datasets(uid, dsid=dsid) file_name, file_type = dataset.name.split('.') target_file_name = file_name + '_' + str(xfid) + '_TEI.' + file_type target_path = os.path.join(app.config['APP_MEDIA'], target_file_name) @after_this_request def remove_file(response): response.headers['x-suggested-filename'] = out_name response.headers.add('Access-Control-Expose-Headers', '*') os.remove(target_path) return response controllers.transformer_download_status(xfid, set=True) # reset status transform_name = controllers.list_transforms(dsid, xfid=xfid).name out_name = dataset.name[:-4] + '-' + transform_name + '.xml' return flask.send_file(target_path, attachment_filename=out_name, as_attachment=True) return flask.make_response({'message': 'ok', 'status': status}, 200)
def ds_get_metadata(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) ds_metadata = controllers.dataset_metadata(dsid) return flask.make_response({'metadata': ds_metadata}, 200)
def xf_entity_transform(xfid, entityid): token = flask.request.args.get('Authorization') token_header = flask.request.headers.get('Authorization') # Old application is in this case sending Authorization token through query params. # Keep this conditional until we drop the support for the old app. if token_header is None: id = verify_user(token) else: id = verify_user(token_header) strip_ns = flask.request.args.get('strip_ns', default='false', type=str) == 'true' strip_header = flask.request.args.get('strip_header', default='false', type=str) == 'true' strip_DictScrap = flask.request.args.get('strip_dict_scrap', default='false', type=str) == 'true' strip_DictScrap = 3 if strip_DictScrap else 0 entity = Datasets.list_dataset_entries(None, entry_id=entityid).contents transformer = controllers.list_transforms(None, xfid=xfid) spec = transformer.transform metadata = Datasets.dataset_metadata(transformer.dsid) if spec is None: return flask.make_response( { 'spec': None, 'entity_xml': None, 'output': None }, 200) parserLookup = lxml.etree.ElementDefaultClassLookup( element=DictTransformator.TMyElement) myParser = lxml.etree.XMLParser(remove_blank_text=True) myParser.set_element_class_lookup(parserLookup) entity_xml = lxml.etree.fromstring(entity, parser=myParser) mapping = DictTransformator.TMapping(spec) mapper = DictTransformator.TMapper() out_TEI, out_aug = mapper.Transform(mapping, [], [lxml.etree.ElementTree(entity_xml)], makeAugmentedInputTrees=True, stripForValidation=strip_ns, stripDictScrap=strip_DictScrap, stripHeader=strip_header, promoteNestedEntries=True, returnFirstEntryOnly=True, metadata=metadata) target_xml = '\n' + lxml.etree.tostring( out_TEI, pretty_print=True, encoding='unicode') target_xml = target_xml.replace( '<entry xmlns:m="http://elex.is/wp1/teiLex0Mapper/meta" xmlns:a="http://elex.is/wp1/teiLex0Mapper/legacyAttributes" xmlns="http://www.tei-c.org/ns/1.0">', '<entry>') original = '\n' + lxml.etree.tostring( entity_xml, pretty_print=True, encoding='unicode') return flask.make_response( { 'spec': spec, 'entity_xml': original, 'output': target_xml }, 200)
def ds_dataset_info(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) dataset = controllers.list_datasets(uid, dsid=dsid) dataset = Datasets.to_dict(dataset) return flask.make_response(jsonify(dataset), 200)
def ds_upload_new_dataset(): token = flask.request.headers.get('Authorization') uid = verify_user(token) # file metadata = flask.request.form.get('metadata', None) dictname = flask.request.files.get('dictname', None) file_content = flask.request.files.get('file', None) total_filesize = flask.request.form.get('dztotalfilesize', None) dzuuid = flask.request.form.get('dzuuid', None) current_chunk = int(flask.request.form.get('dzchunkindex')) total_chunks = int(flask.request.form.get('dztotalchunkcount', None)) chunk_offset = int(flask.request.form.get('dzchunkbyteoffset', None)) # get file extension try: orig_filename = file_content.filename extension = '.' + file_content.filename.split('.')[-1] except AttributeError: orig_filename = 'Dictionary' extension = '.xml' filename = "tempFile_USER-{0:s}".format(str(uid)) + extension filepath = os.path.join(app.config['APP_MEDIA'], secure_filename(filename)) if os.path.exists(filepath) and current_chunk == 0: os.remove(filepath) raise InvalidUsage('File already exists.', status_code=400, enum='FILE_EXISTS') try: # write to file with open(filepath, 'ab') as f: f.seek(chunk_offset) f.write(file_content.stream.read()) except OSError: raise InvalidUsage( "Not sure why, but we couldn't write the file to disk.", status_code=500, enum="FILE_ERROR") if current_chunk != total_chunks: return flask.make_response( jsonify({ 'status': 'OK', 'filename': filename, 'current_chunk': current_chunk, 'total_chunks': total_chunks }), 200) else: # finish upload if os.path.getsize(filepath) != int(total_filesize): os.remove(filepath) raise InvalidUsage("Size mismatch.", status_code=500, enum="FILE_ERROR") else: new_random_name = generate_filename(filename) new_path = os.path.join(app.config['APP_MEDIA'], secure_filename(new_random_name)) os.rename(filepath, new_path) dsid = controllers.add_dataset(db, uid, total_filesize, orig_filename, new_path, dzuuid) controllers.dataset_metadata(dsid, set=True, metadata=metadata) # prepare dataset dataset = controllers.list_datasets(uid, dsid) if "pdf" in dataset.upload_mimetype: controllers.transform_pdf2xml.apply_async(args=[dsid]) else: controllers.clean_empty_namespace(dsid) controllers.map_xml_tags.apply_async(args=[dsid]) return flask.make_response(Datasets.to_dict(dataset), 200)