Пример #1
0
def taggerEntities():
    shouldFilterStopwords = True if request.args.get(
        'stopwords') == 'true' else False
    shouldShowLanguage = True if request.args.get(
        'language') == 'true' else False
    showAdvancedResult = True if request.args.get("advanced") else False

    data = request.data

    # Cleaning the data in input
    dataCleaner = DataCleaner()
    data = dataCleaner.filterCharacters(data)

    json_result = json.loads(data)

    # Language Detection
    text = json_result.get("data", None)
    languageResult = LanguageDetector().classify(text)
    language = languageResult[0]

    # Oslo-Bergen Tagger
    obtManager = OBTManager(json_result)

    # Applying the stopwords
    stopwordManager = StopwordManager(language=language)
    stopwords = stopwordManager.getStopWords(
    ) if shouldFilterStopwords == True else []
    entities = obtManager.findEntities(stopwords=stopwords)

    if showAdvancedResult and len(entities) > 0:
        # Advanced formatting for each entity
        temp = []
        for entity in entities:
            temp.append({
                "name":
                entity,
                "uri":
                "%sentities/%s" % (request.url_root, entity.replace(" ", "_"))
            })
        entities = temp

    result = {}
    result["uri"] = "%s" % (request.base_url, )
    result["data"] = entities
    result["meta"] = {}
    if shouldShowLanguage == True:
        result["meta"]["language"] = languageResult[0]

    json_response = json.dumps(result)
    return Response(json_response, mimetype="application/json")
Пример #2
0
def taggerEntities():
    shouldFilterStopwords = True if request.args.get('stopwords') == 'true' else False
    shouldShowLanguage = True if request.args.get('language') == 'true' else False
    showAdvancedResult = True if request.args.get("advanced") else False

    data = request.data

    # Cleaning the data in input
    dataCleaner = DataCleaner()
    data = dataCleaner.filterCharacters(data)

    json_result = json.loads(data)

    # Language Detection
    text = json_result.get("data", None)
    languageResult = LanguageDetector().classify(text)
    language = languageResult[0]

    # Oslo-Bergen Tagger
    obtManager = OBTManager(json_result)

    # Applying the stopwords
    stopwordManager = StopwordManager(language=language)
    stopwords = stopwordManager.getStopWords() if shouldFilterStopwords == True else []
    entities = obtManager.findEntities(stopwords=stopwords)

    if showAdvancedResult and len(entities) > 0:
        # Advanced formatting for each entity
        temp = []
        for entity in entities:
            temp.append({
                "name": entity,
                "uri": "%sentities/%s"  % (request.url_root, entity.replace(" ", "_"))
            })
        entities = temp

    result = {}
    result["uri"] = "%s" % (request.base_url, )
    result["data"] = entities
    result["meta"] = {}
    if shouldShowLanguage == True:
        result["meta"]["language"] = languageResult[0]

    json_response = json.dumps(result)
    return Response(json_response, mimetype="application/json")
Пример #3
0
def languageDetection():
    data = request.data

    # Cleaning the input data
    dataCleaner = DataCleaner()
    data = dataCleaner.filterCharacters(data)

    # TODO: Add check if the data is in json
    jsonData = json.loads(data)

    # TODO: Add check if the data key is present
    text = jsonData.get("data", None)
    languageResult = LanguageDetector().classify(text)

    result = {}
    result["language"] = languageResult[0]
    result["estimate"] = languageResult[1]

    json_response = json.dumps(result)
    return Response(json_response, mimetype="application/json")
Пример #4
0
def taggerTags():
    shouldFilterStopwords = True if request.args.get(
        'stopwords') == 'true' else False
    shouldShowLanguage = True if request.args.get(
        'language') == 'true' else False

    data = request.data

    # Cleaning the input data
    dataCleaner = DataCleaner()
    data = dataCleaner.filterCharacters(data)

    json_result = json.loads(data)

    # Language Detection
    text = json_result.get("data", None)
    languageResult = LanguageDetector().classify(text)
    language = languageResult[0]

    # Oslo-Bergen Tagger
    obtManager = OBTManager(json_result)

    tags = {}

    # Find the tags
    tags = obtManager.findTags()
    if shouldFilterStopwords == True:
        # Applying the stopwords
        stopwordManager = StopwordManager(language=language)
        tags = stopwordManager.filterStopWords(tags)

    result = {}
    result["uri"] = "%s" % (request.base_url, )
    result["data"] = tags
    result["meta"] = {}
    if shouldShowLanguage == True:
        result["meta"]["language"] = languageResult[0]

    json_response = json.dumps(result)
    return Response(json_response, mimetype="application/json")
Пример #5
0
def taggerTags():
    shouldFilterStopwords = True if request.args.get('stopwords') == 'true' else False
    shouldShowLanguage = True if request.args.get('language') == 'true' else False

    data = request.data

    # Cleaning the input data
    dataCleaner = DataCleaner()
    data = dataCleaner.filterCharacters(data)

    json_result = json.loads(data)

    # Language Detection
    text = json_result.get("data", None)
    languageResult = LanguageDetector().classify(text)
    language = languageResult[0]

    # Oslo-Bergen Tagger
    obtManager = OBTManager(json_result)

    tags = {}

    # Find the tags
    tags = obtManager.findTags()
    if shouldFilterStopwords == True:
        # Applying the stopwords
        stopwordManager = StopwordManager(language=language)
        tags = stopwordManager.filterStopWords(tags)

    result = {}
    result["uri"] = "%s" % (request.base_url, )
    result["data"] = tags
    result["meta"] = {}
    if shouldShowLanguage == True:
        result["meta"]["language"] = languageResult[0]

    json_response = json.dumps(result)
    return Response(json_response, mimetype="application/json")