Exemplo n.º 1
0
async def existing(request):
    """File upload endpoint using existing model.

    ---
    description: Recieves data for deduplication.
    tags:
    - Upload
    produces:
    - application/json
    responses:
        "200":
            description: successful operation. Return confirmation response.
    parameters:
      - in: header
        name: clientId
        schema:
          type: string
          format: uuid
        required: true

    """
    client_id = request.headers.get("clientId")
    logger.debug("Recieving data for classification")
    reader = await request.multipart()
    field = await reader.next()
    filepath = os.path.join("profile-data/", client_id, field.filename)
    size = 0

    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    with open(filepath, "wb") as f:
        while True:
            chunk = await field.read_chunk()
            if not chunk:
                break
            size += len(chunk)
            f.write(chunk)

    message = message_wrapper(client_id, {
        "use_model": True,
        "filepath": filepath
    })
    await app["message_queue"].put(message)

    return web.json_response({"recieved": field.filename, "size": size})
Exemplo n.º 2
0
async def results(request):
    """Results endpoint for retrieving classified data.

    ---
    description: Supplies clustered data containing duplicates.
    tags:
    - Results
    produces:
    - application/json
    responses:
        "200":
            description: successful operation. Return labeled results.
    parameters:
      - in: header
        name: clientId
        schema:
          type: string
          format: uuid
        required: true

    """
    client_id = request.headers.get("clientId")
    datastore = get_datastore(client_id)

    if datastore.has_result and datastore.result.size > 0:
        result = datastore.result
        result = (
            result[result.cluster_id > 0].sort_values("cluster_id").to_json(
                orient="table"))
        return web.Response(body=result, content_type="application/json")
    else:
        if await datastore.get_status("training"):
            message = message_wrapper(client_id, {"training_complete": True})
            await app["message_queue"].put(message)
        if await datastore.get_status("dedupe") and datastore.result.size > 0:
            result = datastore.result
            result = (result[result.cluster_id > 0].sort_values(
                "cluster_id").to_json(orient="table"))
            return web.Response(body=result, content_type="application/json")
        if not datastore.has_result or datastore.result.size <= 0:
            return web.json_response({})
Exemplo n.º 3
0
def dedupe_message():
    return message_wrapper("123abc", {"training_complete": True})
Exemplo n.º 4
0
def train_message():
    return message_wrapper("123abc", {"labeling_complete": True})
Exemplo n.º 5
0
def label_message():
    return message_wrapper(
        "123abc", {"labeled_pairs": {"match": [{}], "distinct": [{}]}}
    )
Exemplo n.º 6
0
def sample_message():
    return message_wrapper("123abc", {"filepath": "some/file/path.csv"})
Exemplo n.º 7
0
def model_message():
    return message_wrapper("123abc", {"use_model": True})
Exemplo n.º 8
0
async def training_post(request):
    """Training endpoint for submitting labelled data.

    ---
    description: Accepts labeled data for training the data matching model.
    tags:
    - Training
    produces:
    - application/json
    responses:
        "200":
            description: Successful operation. Return further training records.
        "400":
            description: Unsuccessful operation. Labelled date not supplied.
    parameters:
      - in: header
        name: clientId
        schema:
          type: string
          format: uuid
        required: true
      - in: body
        name: body
        description: Labelled training data
        required: true
        schema:
          type: object
          properties:
            match:
              type: array
              items:
                type: array
                items:
                  - $ref: '#/definitions/Person'
                  - $ref: '#/definitions/Person'
            distinct:
              type: array
              items:
                type: array
                items:
                  - $ref: '#/definitions/Person'
                  - $ref: '#/definitions/Person'

    """
    if request.body_exists and request.can_read_body:
        logger.debug("Labelled training data recieved.")
        client_id = request.headers.get("clientId")
        datastore = get_datastore(client_id)

        if datastore.training_rounds < 4:
            logger.info("Updating traing pairs for labeling")
            labeled_pairs = await request.json()
            message = message_wrapper(client_id,
                                      {"labeled_pairs": labeled_pairs})
            await app["message_queue"].put(message)
        else:
            message = message_wrapper(client_id, {"labeling_complete": True})
            await app["message_queue"].put(message)
        return web.Response(status=201)

    return web.Response(status=400)