示例#1
0
def process_task(file_ids_list=None):
    files_to_process = boilerplate.list_files(recursive=True,
                                              prefix=boilerplate.UPLOAD_PREFIX)
    if file_ids_list:
        files_to_process = [
            boilerplate.UPLOAD_PREFIX + file_id for file_id in file_ids_list
            if (boilerplate.UPLOAD_PREFIX + file_id) in files_to_process
        ]
    data_to_process = {
        file_id[len(boilerplate.UPLOAD_PREFIX):]:
        boilerplate.get_file(file_id).decode('utf-8')
        for file_id in files_to_process
    }
    processed_file_ids = list()
    files = boilerplate.list_files(recursive=True)
    print('HEYY')
    for file in files:
        print('first')
        process_data(file)
        processed_file_ids.append(file)
        # boilerplate.add_processed_file(
        #     processed_file_id,
        #     contents,
        #     extension='txt'
        # )

    return processed_file_ids
示例#2
0
def query_endpoint(file_id=None):
    query_type = request.args.get('type')
    if request.method == 'POST':
        tags_required = request.get_json()
    else:
        tags_required = None

    if file_id is None and query_type is None:
        return jsonify({"error": boilerplate.ERROR_NO_QUERY_TYPE_SPECIFIED})
    else:
        if file_id == "gold":
            if query_type == "statistics":
                return jsonify(boilerplate.get_gold_statistics())
            if query_type == "examples":
                limit = request.args.get('limit')
                try:
                    limit = int(limit)
                except ValueError:
                    return jsonify({"error": "wrong limit parameter passed"})
                return jsonify(boilerplate.get_gold_examples(limit))
            else:
                processed_file, file_id = boilerplate.get_gold("txt")
                text = boilerplate.read_file(processed_file)
        else:
            processed_file_id = boilerplate.PROCESSED_PREFIX + file_id
            if processed_file_id in boilerplate.list_files(recursive=True):
                text = boilerplate.get_file(processed_file_id)
            else:
                return jsonify({"error": boilerplate.ERROR_NO_SUCH_FILE})
        return jsonify(query_data(query_type, text, tags_required))
def get_file_endpoint(file_id):
    if file_id in boilerplate.list_files(recursive=True):
        contents = boilerplate.get_file(file_id)
        if file_id.startswith(boilerplate.PROCESSED_PREFIX) and \
           file_id.endswith('.xml'):
            return Response(contents, mimetype='text/xml')
        return Response(contents, mimetype='text/plain')
    return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE})
def get_file(file_id):
    if file_id not in boilerplate.list_files(recursive=True):
        raise JSONRPCDispatchException(code=boilerplate.ERROR_NO_SUCH_FILE_CODE, message=boilerplate.ERROR_NO_SUCH_FILE)
    file_contents_base64 = None
    try:
        file_contents_base64 = b64encode(boilerplate.get_file(file_id)).decode("utf-8")
    except TypeError:
        raise JSONRPCDispatchException(code=boilerplate.ERROR_NO_FILE_PART_CODE, message=boilerplate.ERROR_NO_FILE_PART)
    return {"file_id": file_id,
            "file_contents_base64": file_contents_base64}
def do_query(file_id, query_type):
    if not query_type:
        return {"error": boilerplate.ERROR_NO_QUERY_TYPE_SPECIFIED}
    processed_file_id = boilerplate.PROCESSED_PREFIX + file_id
    if processed_file_id in boilerplate.list_files(recursive=True):
        return {
            "result": query_data({
                processed_file_id: boilerplate.get_file(processed_file_id)
            }, query_type=query_type)
        }
    return {"error": boilerplate.ERROR_NO_SUCH_FILE}
示例#6
0
def query_endpoint(file_id):
    query_type = request.args.get('type')
    if not query_type:
        return jsonify({"error": boilerplate.ERROR_NO_QUERY_TYPE_SPECIFIED})
    processed_file_id = boilerplate.PROCESSED_PREFIX + file_id
    if processed_file_id in boilerplate.list_files(recursive=True):
        return jsonify({
            "result":
            query_data(
                {processed_file_id: boilerplate.get_file(processed_file_id)},
                query_type=query_type)
        })
    return jsonify({"error": boilerplate.ERROR_NO_SUCH_FILE})
示例#7
0
def get_file_endpoint(file_id):
    if file_id in boilerplate.list_files(recursive=True):
        response = make_response(boilerplate.get_file(file_id))
        response.headers["Content-Disposition"] = \
            "attachment; filename=%s" % file_id
        return response
    if file_id == "gold":
        query_type = request.args.get('type')
        processed_file, file_id = boilerplate.get_gold(query_type)
        return send_file(processed_file,
                         mimetype='txt',
                         attachment_filename=file_id,
                         as_attachment=True)
    return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE})
示例#8
0
def load_file_endpoint():
    if request.method == 'POST':
        print('here')
        file_id = request.json['file']
        print(file_id)
        if file_id in boilerplate.list_files(recursive=True):
            # processed_file = boilerplate.get_file(file_id)
            # print(processed_file.__class__)
            # address = os.path.join('/data', 'minio', file_id)
            # f = boilerplate.get_file(file_id)
            return send_file(file_id,
                             mimetype='text/csv',
                             attachment_filename=file_id,
                             as_attachment=True)
        return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE})
    return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE})
示例#9
0
def process_task(file_ids_list=None):
    files_to_process = boilerplate.list_files(recursive=True,
                                              prefix=boilerplate.UPLOAD_PREFIX)
    if file_ids_list:
        files_to_process = [
            boilerplate.UPLOAD_PREFIX + file_id for file_id in file_ids_list
            if (boilerplate.UPLOAD_PREFIX + file_id) in files_to_process
        ]
    data_to_process = {
        file_id[len(boilerplate.UPLOAD_PREFIX):]: boilerplate.get_file(file_id)
        for file_id in files_to_process
    }
    processed_file_ids = list()
    for processed_file_id, contents in process_data(data_to_process):
        processed_file_ids.append(
            boilerplate.add_processed_file(processed_file_id, contents))
    return processed_file_ids
示例#10
0
def process_task(file_ids_list=None):
    files_to_process = boilerplate.list_files(recursive=True,
                                              prefix=boilerplate.UPLOAD_PREFIX)
    if file_ids_list:
        files_to_process = [
            boilerplate.UPLOAD_PREFIX + file_id for file_id in file_ids_list
            if (boilerplate.UPLOAD_PREFIX + file_id) in files_to_process
        ]
    data_to_process = {
        file_id[len(boilerplate.UPLOAD_PREFIX):]: boilerplate.get_file(file_id)
        for file_id in files_to_process
    }
    for filename, file_contents in data_to_process.items():
        with open(join(TOMITA_PATH_IN, filename), 'wb') as f:
            f.write(file_contents)

    i = inotify.adapters.Inotify()

    i.add_watch(TOMITA_PATH_OUT)

    processed_file_ids = set()

    for (_, type_names, path, out_filename) in i.event_gen(yield_nones=False):
        print("PATH=[{}] FILENAME=[{}] EVENT_TYPES={}".format(
            path, out_filename, type_names))

        if not out_filename.startswith('.') and \
           out_filename.endswith('.xml') and \
           'IN_CLOSE_WRITE' in type_names:
            full_filename = join(path, out_filename)
            with open(full_filename, 'rb') as f:
                contents = BytesIO(f.read())
                contents_length = getsize(full_filename)
                print(contents)
                generated_filename = boilerplate.add_processed_file(
                    None, contents, "xml", contents_length)
                processed_file_ids.add(generated_filename)

        if len(processed_file_ids) >= len(set(data_to_process.keys())):
            break

    return list(processed_file_ids)
示例#11
0
def list_files_endpoint():
    return jsonify({'file_ids': boilerplate.list_files(recursive=True)})
示例#12
0
def get_file_endpoint(file_id):
    if file_id in boilerplate.list_files(recursive=True):
        return boilerplate.get_file(file_id)
    return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE})
def list_files():
    return {'file_ids': boilerplate.list_files(recursive=True)}
示例#14
0
def get_file_endpoint(file_id):
    if file_id in boilerplate.list_files(recursive=True):
        contents = boilerplate.get_file(file_id)
        return Response(contents, mimetype='text/plain')
    return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE})