Exemplo n.º 1
0
def getTrainingData():
    """
    Gets all the available training data.

    Required params:\n
    -action = 'getData'\n
    -stoken\n

    :return: Returns the training data for that user and 200.
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    chkMissingData, data = getRequiredParameters(request, stoken='', action='')

    if not chkMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()
    userID = con.checkToken(data["stoken"])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != 'getData':
        return jsonify({
            'status': {
                'message':
                'Wrong action. This endpoint is only for retrieving training data.',
                'error': True
            },
            'data': None
        }), 403

    modelID, modelName = con.getModelInfo(str(userID))

    availableTraining = con.getAvailableTraining(modelID)

    formattedTraining = list()
    for item in availableTraining:
        trainingDict = dict()
        trainingDict["id"] = item[0]
        trainingDict["rawText"] = item[1]
        trainingDict["sentiment"] = item[3]
        trainingDict["tags"] = item[4]
        trainingDict["dateTrained"] = item[5]
        formattedTraining.append(trainingDict)

    return jsonify({
        'status': RETURN_SUCCESS_STATUS,
        'data': {
            'availableTraining': formattedTraining
        }
    }), 200
Exemplo n.º 2
0
def processAsk():
    """
    Gets a prediction from the model.

    Required params:\n
    -action = 'ask'\n
    -stoken\n
    -datatype => text or blob\n
    -data => raw text or base64 blob\n
    -tags => list of tags\n
    :return: The predicted sentiment, related tags and the original submitted text and 201
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    data = request.json
    noMissingData, data = getRequiredParameters(request,
                                                stoken='',
                                                action='',
                                                datatype='',
                                                data='')

    if not noMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()
    userID = con.checkToken(data['stoken'])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != "ask":
        return jsonify({
            'status': {
                'message': 'Wrong action. This endpoint is only for teach.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] != 'text' and data['datatype'] != 'blob':
        return jsonify({
            'status': {
                'message':
                'Wrong datatype. This endpoint only accepts raw text as \'text\' and pdf or txt files as \'blob\'.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] == 'text':
        txtdata = str(request.json['data'])

        # remove escape characters, if any
        txtdata = txtdata.replace("\\", "")

        if con.getModelInfo(str(userID)) == [None, None]:
            return jsonify({
                'status': {
                    'message': "No model created yet.",
                    'error': True
                },
                'data': None
            }), 400

        modelID, modelName = con.getModelInfo(str(userID))

        availTraining = con.getAvailableTraining(modelID)

        ifPrevKnowSimilar = False
        for eachTraining in availTraining:
            if getCosineSimilarity(
                    txtdata,
                    eachTraining[1]) >= PREVIOUS_KNOWLEDGE_SIMILARITY_RATE:
                ifPrevKnowSimilar = True
                break

        if not ifPrevKnowSimilar:
            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': {
                    'predictedSentiment': "Not available",
                    'suggested': "Not available",
                    'text': txtdata
                }
            }), 200

        model = Classifier(makeNewModel=False, modelName=modelName)

        predSentiment, retTags = model.predict(txtdata)

        return jsonify({
            'status': RETURN_SUCCESS_STATUS,
            'data': {
                'predictedSentiment': predSentiment,
                'suggested': retTags,
                'text': txtdata
            }
        }), 200
    elif data['datatype'] == 'blob':
        if type(data['data']) != list and type(data['data']) != str:
            return jsonify({
                'status': {
                    'error':
                    True,
                    'message':
                    'Data in the \'data\' field should be sent as a string or a list of items.'
                },
                'data': None
            }), 400

        if type(data['data']) == list:  # if multiple files
            modelID, modelName = con.getModelInfo(str(userID))
            model = Classifier(makeNewModel=False, modelName=modelName)

            predSentiment = list()
            retTags = list()
            for item in data['data']:
                filename = str(uuid.uuid4())  # generate temp file name
                status, filepath = getFileBase64(item, filename)

                # in case file received is not a text-based (.pdf/.txt) file
                if not filepath.endswith('pdf') and not filepath.endswith(
                        'txt'):
                    os.remove(filepath)
                    return jsonify({
                        'status': {
                            'error': True,
                            'message': "Wrong format for data."
                        },
                        'data': None
                    }), 400

                if not status:
                    return jsonify({
                        'status': {
                            'error': False,
                            'message': ERRORFILEEXTEN
                        }
                    }), 400

                textData = readTextFileContents(filepath)
                sent, tag = model.predict(textData)
                predSentiment.append(sent)
                retTags.append(tag)
                os.remove(filepath)

            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': {
                    'predictedSentiment': predSentiment,
                    'suggested': retTags,
                    'text': data['data']
                }
            }), 200
        elif type(data['data']) == str:  # if single file
            modelID, modelName = con.getModelInfo(str(userID))
            model = Classifier(makeNewModel=False, modelName=modelName)

            filename = str(uuid.uuid4())  # generate temp file name
            status, filepath = getFileBase64(data['data'], filename)

            # in case file received is not a text-based (.pdf/.txt) file
            if not filepath.endswith('pdf') and not filepath.endswith('txt'):
                os.remove(filepath)
                return jsonify({
                    'status': {
                        'error': True,
                        'message': "Wrong format for data."
                    },
                    'data': None
                }), 400

            textData = readTextFileContents(filepath)
            os.remove(filepath)

            predSentiment, retTags = model.predict(textData)

            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': {
                    'predictedSentiment': predSentiment,
                    'suggested': retTags,
                    'text': data['text']
                }
            }), 200
Exemplo n.º 3
0
def retrain():
    """
    Used to retrain the model.

    Required params:\n
    -action = 'retrain'\n
    -stoken => the session token \n
    -data => a list of trainingID that will be deleted\n

    :return: A success message and 200.
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    chkMissingData, data = getRequiredParameters(request,
                                                 stoken='',
                                                 action='',
                                                 data='')

    if not chkMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()
    userID = con.checkToken(data["stoken"])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != 'retrain':
        return jsonify({
            'status': {
                'message':
                'Wrong action. This endpoint is only for retraining.',
                'error': True
            },
            'data': None
        }), 403

    if type(data['data']) != list:
        return jsonify({
            'status': {
                'message':
                'Bad data. Data in the \'data\' field must be sent as list.',
                'error': True
            },
            'data': None
        }), 400

    # get old model info
    oldModelID, oldModelName = con.getModelInfo(str(userID))

    if not con.removeTraining(oldModelID, data['data']):
        return jsonify({
            'status': {
                'message': 'Internal server encountered. Please try again.',
                'error': True
            },
            'data': None
        }), 500

    availableTraining = con.getAvailableTraining(oldModelID)

    # make a new model
    model = Classifier(makeNewModel=True)
    for eachData in availableTraining:
        model.train(text=eachData[2],
                    sentiment=eachData[3],
                    tags=eachData[4],
                    fromDB=True)

    # update db with new model info
    if not con.updateModelInfo(oldModelID, modelName=model.modelName):
        return jsonify({
            'status': {
                'message': 'Internal server error when updating the resource.',
                'error': True
            },
            'data': None
        }), 500

    if not oldModelName.endswith(PICKLE_FILE_EXTENSION):
        oldModelName += PICKLE_FILE_EXTENSION

    os.remove(os.path.join(os.path.dirname(__file__),
                           'Models/' + oldModelName))

    return jsonify({
        'status': {
            'message': 'Model successfully retrained.',
            'error': False
        },
        'data': None
    }), 200