Exemplo n.º 1
0
 def calculate_extensions(self):
     return # need to pass because this cost money that we don't have
     tweet_text = self.tweet.text
     response = "Languange not supported"
     try:
         if self.tweet.lang == 'ar':
             response = NLU.analyze(text=tweet_text, features=Features(categories=CategoriesOptions(limit=1)),
                                    language='ar').get_result()
             if len(response['categories']) > 0:
                 self.category = response['categories'][0]['label']
             translated = NLT.translate(text=tweet_text, model_id='ar-en', source='ar', target='en').get_result()
             translated = translated['translations'][0]['translation']
             response = NLU.analyze(text=translated, features=Features(concepts=ConceptsOptions(limit=1),
                                                                       entities=EntitiesOptions(limit=1, sentiment=True),
                                                                       keywords=KeywordsOptions(limit=1, sentiment=True),
                                                                       ), language='en').get_result()
             self.extract_englishonly_catagories(response)
         elif self.tweet.lang == 'en':
             response = NLU.analyze(text=tweet_text, features=Features(concepts=ConceptsOptions(limit=1),
                                                                       entities=EntitiesOptions(limit=1, sentiment=True),
                                                                       keywords=KeywordsOptions(limit=1, sentiment=True),
                                                                       categories=CategoriesOptions(limit=1),
                                                                       ), language='en').get_result()
             if len(response['categories']) > 0:
                 self.category = response['categories'][0]['label']
             self.extract_englishonly_catagories(response)
     except ApiException as ex:
         print("error in calculate_AI_things")
         print(exc)
         return
Exemplo n.º 2
0
def extractEntities(input_filepath, output_filepath):
    df = pd.read_csv(input_filepath)
    (rows, _) = df.shape
    for idx in range(0, rows, 1):
        hotline_url = df["URL"][idx]
        nlu_categories = natural_language_understanding.analyze(
            url=hotline_url,
            features=Features(categories=CategoriesOptions())).get_result()
        nlu_keywords = natural_language_understanding.analyze(
            url=hotline_url,
            features=Features(
                keywords=KeywordsOptions(sentiment=True, emotion=True)),
        ).get_result()
        nlu_concepts = natural_language_understanding.analyze(
            url=hotline_url,
            features=Features(concepts=ConceptsOptions())).get_result()
        categories_list = list(
            map(lambda x: x["label"], nlu_categories["categories"]))
        keywords_list = list(map(lambda x: x["text"],
                                 nlu_keywords["keywords"]))
        concepts_list = list(map(lambda x: x["text"],
                                 nlu_concepts["concepts"]))
        categories_list_extracted = list(
            map(lambda x: x.split("/")[1:], categories_list))
        categories_list_flattened = list(
            set(list(itertools.chain(*categories_list_extracted))))
        # If there are not enough concepts add keywords to the list
        if len(concepts_list) < 3:
            concepts_list = concepts_list + keywords_list
        df["Concepts"][idx] = concepts_list
        df["Subject"][idx] = categories_list_flattened
    df.to_csv(output_filepath, index=False)
    return df
def nlp_watson(url):
    print(url)
    natural_language_understanding = NaturalLanguageUnderstandingV1(
        version='2018-11-16',
        iam_apikey='',
        url=
        'https://gateway.watsonplatform.net/natural-language-understanding/api'
    )
    try:
        response = natural_language_understanding.analyze(
            url=url,
            features=Features(categories=CategoriesOptions(limit=15),
                              concepts=ConceptsOptions(limit=10),
                              entities=EntitiesOptions(sentiment=True,
                                                       limit=20),
                              keywords=KeywordsOptions(sentiment=True,
                                                       emotion=True,
                                                       limit=5),
                              metadata=MetadataOptions())
            #relations=RelationsOptions()),
        ).get_result()

        data = json.dumps(response, indent=2)
        # new = json.loads(response)
        # print(data)
        db = client.asalvo
        news = db.news
        new_id = news.insert_one(response).inserted_id
        # print(new_id)
    except:
        print('Error ocurred')

    return 0
Exemplo n.º 4
0
def sn(rr):
    #Categorías de respuesta
    pagina = rr
    print(pagina)
    response = natural_language_understanding.analyze(
        url=pagina,
        features=Features(categories=CategoriesOptions(limit=3))).get_result()
    print(json.dumps(response, indent=2))

    #Respuesta de conceptos

    response2 = natural_language_understanding.analyze(
       url=pagina,
        features=Features(concepts=ConceptsOptions(limit=3))).get_result()

    print(json.dumps(response2, indent=2))

    #Emoción
   # response3 = natural_language_understanding.analyze(
   #        url=pagina,
   #        features=Features(emotion=EmotionOptions())).get_result()

   # print(json.dumps(response3, indent=2))

    #Sentimiento
    response4 = natural_language_understanding.analyze(
        url=pagina,
        features=Features(sentiment=SentimentOptions())).get_result()

    print(json.dumps(response4, indent=2))
Exemplo n.º 5
0
def analyze(natural_language_understanding, input_text):
    response = natural_language_understanding.analyze(
        text=input_text,
        features=Features(emotion=EmotionOptions(),
                          categories=CategoriesOptions(limit=3),
                          concepts=ConceptsOptions(limit=3),
                          keywords=KeywordsOptions(limit=2))).get_result()

    return response
def get_nlu_tone_analysis(tweet):
    """
    This function takes input as a tweet and
    returns their sentiment (Positive, Neutral or Negative),
    concepts (high level concepts or ideas),
    emotions (anger, disgust, fear, joy, or sadness),
    and tones (emotional and language tone)
    """
    ## Encode ASCII
    tweet = tweet.encode(encoding='ASCII', errors='ignore').decode('ASCII')
    ## Remove URLs
    tweet_cleaned = re.sub(r'http\S+', '', tweet)
    if tweet_cleaned:

        ## Call NLU API
        nlu_analysis = natural_language_understanding.analyze(
            text=tweet_cleaned,
            language='en',
            features=Features(concepts=ConceptsOptions(limit=2),
                              sentiment=SentimentOptions(),
                              emotion=EmotionOptions())).get_result()

        concepts = ', '.join(
            [concept['text'] for concept in nlu_analysis['concepts']])
        sentiment = nlu_analysis['sentiment']['document']['label']
        emotions = nlu_analysis['emotion']['document']['emotion']
        dominant_emotion = max(emotions, key=emotions.get)

        ## Call tone analyzer API
        tone_analysis = tone_analyzer.tone({
            'text': tweet_cleaned
        },
                                           content_type='text').get_result()

        tones = ', '.join([
            tone['tone_name']
            for tone in tone_analysis['document_tone']['tones']
        ])

        ## Create result table
        result = {
            'tweet': tweet,
            'sentiment': sentiment,
            "emotion": dominant_emotion,
            'concepts': concepts,
            'tones': tones
        }
    else:
        result = {
            'tweet': tweet,
            'sentiment': '',
            "emotion": '',
            'concepts': '',
            'tones': ''
        }

    return (result)
def get_tags_from_fav(user):
    try:
        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        
        api = tweepy.API(auth)
        
        favorites = api.favorites( user,3)
        
        index = 0
        dic_tweets = dict()
        
        for tweets in favorites:
            
            text_of_tweet = tweets.text.encode("ascii", "ignore").decode("ascii", "ignore")
        
            dic_tweets[index] = text_of_tweet
            
            index += 1
            
        
        natural_language_understanding = NaturalLanguageUnderstandingV1(
            version='2018-11-16',
            iam_apikey='zq_JRbDtCInoaWml-ZAjfGFn2Vj2b9wvZzfRE1O5U_wJ',
            url='https://gateway.watsonplatform.net/natural-language-understanding/api/v1/analyze?version=2018-11-16'
        )
        
        tags = []
        
        for keys in dic_tweets.keys():
            try:
            
                response_categories = natural_language_understanding.analyze(
                    text=dic_tweets[keys],
                    features=Features(categories=CategoriesOptions(limit=5))).get_result()
                
                response_concepts = natural_language_understanding.analyze(
                text=dic_tweets[keys],
                features=Features(concepts=ConceptsOptions(limit=5))).get_result()
            
            
                if len(response_concepts["concepts"]) != 0:
                    for i in range(len(response_concepts["concepts"])):
                        tags.append(response_concepts["concepts"][i]["text"])
                        print('     '+str(response_concepts["concepts"][i]["text"]))
                    
                tags.append(response_categories["categories"][0]["label"].split("/")[-1])
                print('     '+str(response_categories["categories"][0]["label"].split("/")[-1]))
            except:
                continue
        
        return tags
    
    except:
        return None
Exemplo n.º 8
0
def analyzeFrame(text):
    try:
        return nlu.analyze(text=text,
                           features=Features(
                               categories=CategoriesOptions(limit=3),
                               concepts=ConceptsOptions(limit=3),
                               entities=EntitiesOptions(limit=5),
                               keywords=KeywordsOptions(limit=10),
                               relations=RelationsOptions())).get_result()
    except (Exception, ApiException) as err:
        return {'err': True, 'errMsg': err.__str__()}
Exemplo n.º 9
0
def analyze_text(corpus_id, text, type, n_archs):
    features = Features(
        concepts=ConceptsOptions(),
        entities=EntitiesOptions(),
        keywords=KeywordsOptions(),
    )
    authenticator = IAMAuthenticator(
        current_app.config['NATURAL_LANGUAGE_UNDERSTANDING_IAM_APIKEY']
    )
    service = NaLaUn(
        version=current_app.config['NATURAL_LANGUAGE_UNDERSTANDING_VERSION'],
        authenticator=authenticator)
    service.set_service_url(
        current_app.config['NATURAL_LANGUAGE_UNDERSTANDING_URL']
    )
    response = service.analyze(
        text=text,
        features=features
    )
    results = {}
    typ_list = ['entities', 'concepts', 'keywords']
    for typ in typ_list:
        results[typ] = pd.DataFrame(response.result[typ])

    test_vec = \
        results['concepts'].set_index('text')[['relevance']].apply(norm_dot)
    archetypes = get_corpus_archetypes(corpus_id, type=type, n_archs=n_archs)

    # Select the subset of features in corpus that cover the test vector.
    in_common = list(set(test_vec.index).intersection(
        set(archetypes.fn.columns)
    ))

    similarities = (
        (archetypes.fn[in_common] @ test_vec.loc[in_common]) * 100
    ).applymap(int)
    similarities.columns = ['similarity %']

    test_vec_expanded = pd.DataFrame(
        test_vec,
        index=archetypes.f.columns
    ).apply(scale).fillna(-0.1)

    compare = archetypes.f.T.apply(scale)
    compare['DOC'] = test_vec_expanded.apply(scale)

    archetype_maps = []
    for ix in archetypes.f.index:
        cmp = compare.sort_values(by=ix, ascending=True)[[ix, 'DOC']]
        cmp = cmp[cmp[ix] > 0.1]
        archetype_maps.append(cmp.applymap(np.sqrt))

    return similarities, archetype_maps
Exemplo n.º 10
0
def analyze_corpus(app, name, directory):
    features = Features(
        concepts=ConceptsOptions(),
        entities=EntitiesOptions(),
        keywords=KeywordsOptions(),
    )
    with app.app_context():
        authenticator = IAMAuthenticator(
            app.config['NATURAL_LANGUAGE_UNDERSTANDING_IAM_APIKEY'])
        service = NaLaUn(
            version=app.config['NATURAL_LANGUAGE_UNDERSTANDING_VERSION'],
            authenticator=authenticator)
        service.set_service_url(
            app.config['NATURAL_LANGUAGE_UNDERSTANDING_URL'])

        filenames = os.listdir(directory)
        new_corpus = Corpus(name=name, status='processing')
        db.session.add(new_corpus)
        db.session.commit()
        db.session.flush()
        print('Analyzing corpus in thread. Corpus ID: ' + str(new_corpus.id))
        count = 0
        for file in filenames:
            path = os.path.join(directory, file)
            if not os.path.isfile(path) or not file.endswith('.txt'):
                continue
            with open(path) as f:
                for i in range(3):
                    try:
                        results = service.analyze(text=f.read(),
                                                  features=features)
                        pickled_results = pickle.dumps(results)
                        new_results = CorpusResult(corpus_id=new_corpus.id,
                                                   name=file.replace(
                                                       '.txt', ''),
                                                   data=pickled_results)
                        db.session.add(new_results)
                        db.session.commit()
                        count += 1
                        print('Processed file #{}: {} '.format(count, file))
                    except Exception as e:
                        print(e)
                        time.sleep(0.5)
                        print('Retrying...')
                    else:
                        break
                else:
                    print('Failed to analyze a file ({}) after ' +
                          'multiple attempts.'.format(file))

        new_corpus.status = 'ready'
        db.session.commit()
        print('Finished analyzing corpus.')
Exemplo n.º 11
0
def analyze_emotion(lyrics):
    response = natural_language_understanding.analyze(
        text=lyrics,
        features=Features(entities=EntitiesOptions(emotion=True,
                                                   sentiment=True,
                                                   limit=5),
                          keywords=KeywordsOptions(emotion=True,
                                                   sentiment=True,
                                                   limit=10),
                          concepts=ConceptsOptions(limit=5),
                          relations=RelationsOptions())).get_result()

    return json.dumps(response, indent=2)
Exemplo n.º 12
0
    def get_concept(request):
        natural_language_understanding = NaturalLanguageUnderstandingV1(
            version='2018-11-16',
            iam_apikey='VUJWvResSognuEFqEu3GAi_mBcR1fmFvrce-5JRWLhqr',
            url=
            'https://gateway.watsonplatform.net/natural-language-understanding/api'
        )

        response = natural_language_understanding.analyze(
            url='https://docs.djangoproject.com/en/2.2/topics/db/queries/',
            features=Features(concepts=ConceptsOptions(
                limit=20))).get_result()
        data = json.dumps(response, indent=2)
        return JsonResponse(json.loads(data), safe=False)
Exemplo n.º 13
0
def send_request(texto):
    natural_language_understanding = NaturalLanguageUnderstandingV1(
        version='2018-11-16',
        iam_apikey='VUJWvResSognuEFqEu3GAi_mBcR1fmFvrce-5JRWLhqr',
        url=
        'https://gateway.watsonplatform.net/natural-language-understanding/api'
    )
    print(texto)
    response = natural_language_understanding.analyze(
        text=texto,
        features=Features(concepts=ConceptsOptions(limit=100),
                          keywords=KeywordsOptions(limit=10))).get_result()

    return response
Exemplo n.º 14
0
def analyze_using_NLU(analysistext):
    """ Extract results from Watson Natural Language Understanding for each news item
    """
    res = dict()
    response = natural_language_understanding.analyze(
        text=analysistext,
        features=Features(sentiment=SentimentOptions(),
                          entities=EntitiesOptions(),
                          keywords=KeywordsOptions(),
                          emotion=EmotionOptions(),
                          concepts=ConceptsOptions(),
                          categories=CategoriesOptions(),
                          semantic_roles=SemanticRolesOptions()))
    res['results'] = response
    return res
Exemplo n.º 15
0
    def keywords(self, text):
        API_KEY = 'b6NOltM3-zpJPqI4KiGfhfHLM6fkSkJMUKHKlBvOSn4G'
        url = 'https://api.us-east.natural-language-understanding.watson.cloud.ibm.com/instances/62ddc339-68c4-414a-b6da-bf78870c170f'
        nlu = self.authenticate(API_KEY,
                                NaturalLanguageUnderstandingV1,
                                url,
                                version='2020-08-01')

        analysis = nlu.analyze(
            text=text,
            features=Features(concepts=ConceptsOptions(limit=5),
                              keywords=KeywordsOptions(emotion=True,
                                                       sentiment=True,
                                                       limit=5))).get_result()

        return analysis
Exemplo n.º 16
0
def watsonNLU(versionID, apiKey, apiURL, model_id, Text):
    natural_language_understanding = NaturalLanguageUnderstandingV1(
        version=versionID, iam_apikey=apiKey, url=apiURL)

    response = natural_language_understanding.analyze(
        text=Text,
        features=Features(concepts=ConceptsOptions(limit=50),
                          entities=EntitiesOptions(emotion=True,
                                                   sentiment=True,
                                                   limit=50,
                                                   model=model_id),
                          keywords=KeywordsOptions(emotion=True,
                                                   sentiment=True,
                                                   limit=50))).get_result()

    return response
Exemplo n.º 17
0
def test_watson():
    #print('hi')
    natural_language_understanding = NaturalLanguageUnderstandingV1(
        version='2019-07-12',
        iam_apikey='INJBZJ_fupvtsCswpI9UUOBZZmOYp6vHkHPEAZX_WU31',
        url=
        'https://gateway.watsonplatform.net/natural-language-understanding/api'
    )
    #print('xx')
    for i in read_output:
        #print('hello')
        url_link = i[0]
        print(url_link)
        #key categories
        #categories_response = natural_language_understanding.analyze(
        #url=url_link,
        #features=Features(categories=CategoriesOptions(limit=10))).get_result()
        #print(json.dumps(categories_response, indent=2))

        #key concepts
        concepts_response = natural_language_understanding.analyze(
            url=url_link,
            features=Features(concepts=ConceptsOptions(
                limit=10))).get_result()
        output.append(json.dumps(concepts_response, indent=2))
        print('CONCEPTS')
        print(json.dumps(concepts_response, indent=2))

        #top keywords
        keywords_response = natural_language_understanding.analyze(
            url=url_link,
            features=Features(keywords=KeywordsOptions(
                sentiment=True, emotion=False, limit=10))).get_result()
        output.append(json.dumps(concepts_response, indent=2))
        print('KEYWORDS')
        print(json.dumps(keywords_response, indent=2))

        #top entities
        entities_response = natural_language_understanding.analyze(
            url=url_link,
            features=Features(entities=EntitiesOptions(
                sentiment=False, limit=10))).get_result()
        output.append(json.dumps(concepts_response, indent=2))
        print('ENTITIES')
        print(json.dumps(entities_response, indent=2))

    Tools.write_csv_data('./watson_output.csv', output)
Exemplo n.º 18
0
def prospection_nlu():

    namefile, sheet, Line_of_first_article_to_be_treated_in_the_excel_file, Line_of_last_article_to_be_treated_in_the_excel_file = input_file_to_treat_with_cells(
    )
    list_occurences_keywords = []
    list_all_keywords = []
    list_occurences_keywords.append(["initialisation", 1])
    for text_index in range(
            Line_of_first_article_to_be_treated_in_the_excel_file,
            Line_of_last_article_to_be_treated_in_the_excel_file + 1):
        print("\nArticle number %d in the file about to be prospected\n" %
              text_index)
        # On amorçe la lecture du fichier excel
        print("Analysis of text number %d is starting. \n" % text_index)
        text = sheet.cell_value(text_index, 0).replace("\n", "")
        text = identification_language_and_translation(text)

        # On envoie le texte à NLU
        response_nlu = naturalLanguageUnderstanding.analyze(
            text=text,
            features=Features(
                concepts=ConceptsOptions(limit=5),
                entities=EntitiesOptions(emotion=True, sentiment=True),
                keywords=KeywordsOptions(emotion=True, sentiment=True),
                sentiment=SentimentOptions(document=True),
                emotion=EmotionOptions(document=True))).get_result()
        for l in range(len(response_nlu["keywords"])):
            split_it = response_nlu["keywords"][l]["text"].split()
            for keyword in split_it:
                list_all_keywords.append(keyword)


#            Manual way of storing the list of already appeared keywords, but counter module does the same faster below
#            flag_keyword_already_appeared=0
#            index_already_stored_keywords=0
#            length=len(list_occurences_keywords)
#            while flag_keyword_already_appeared==0 and index_already_stored_keywords<length:
#                if response_nlu["keywords"][l]["text"]==list_occurences_keywords[index_already_stored_keywords][0]:
#                    list_occurences_keywords[index_already_stored_keywords][1]+=1
#                    flag_keyword_already_appeared=1
#                index_already_stored_keywords+=1
#            if flag_keyword_already_appeared==0:
#                list_occurences_keywords.append([response_nlu["keywords"][l]["text"],1])
    count_intermediate = Counter(map(str.lower, list_all_keywords))
    most_occur = count_intermediate.most_common(10)

    return most_occur
Exemplo n.º 19
0
    def get_concept_keyword(self) -> Dict[str, List]:
        authenticator = IAMAuthenticator(os.getenv("IBM_NLP_API"))
        natural_language_understanding = NaturalLanguageUnderstandingV1(
            version='2019-07-12', authenticator=authenticator)

        natural_language_understanding.set_service_url(
            os.getenv("IBM_NLP_URL"))

        res = natural_language_understanding.analyze(
            text=self.text,
            features=Features(
                keywords=KeywordsOptions(limit=20),
                concepts=ConceptsOptions(limit=20),
            )).get_result()

        keywords = [keyword['text'] for keyword in res['keywords']]
        concepts = [concept['text'] for concept in res['concepts']]
        result = dict(keywords=keywords, concepts=concepts)

        return result
Exemplo n.º 20
0
    def _parallel_NlU(self, text):

        # A Function to call Watson Natural Language Understanding

        if self.config['keywords']:
            keyword_option = KeywordsOptions(
                limit=self.config['keyword_limit'])
        else:
            keyword_option = None

        if self.config['concepts']:
            concepts_option = ConceptsOptions(
                limit=self.config['concept_limit'])
        else:
            concepts_option = None

        try:
            results = self.model.analyze(text=text,
                                         features=Features(
                                             concepts=concepts_option,
                                             keywords=keyword_option),
                                         language='en')

            json_results = results.get_result()

            our_concepts = []
            for concept in json_results['concepts']:
                our_concepts.append(concept['text'])

            our_keywords = []
            for keyword in json_results['keywords']:
                our_keywords.append(keyword['text'])

            self.lock.acquire()
            self.concepts = self.concepts + our_concepts
            self.keywords = self.keywords + our_keywords
            self.lock.release()

        except Exception as e:
            print(str(e))
Exemplo n.º 21
0
def analyze(url):
    service = NaturalLanguageUnderstandingV1(
        version=config.Config.IBM_VERSION,
        ## url is optional, and defaults to the URL below. Use the correct URL for your region.
        url=config.Config.IBM_URL,
        iam_apikey=config.Config.IBM_API_KEY)

    response = service.analyze(
        url=url,
        # text='what is the application of NLP in web page search?',
        features=Features(categories=CategoriesOptions(),
                          concepts=ConceptsOptions(limit=10),
                          entities=EntitiesOptions(),
                          relations=RelationsOptions(),
                          semantic_roles=SemanticRolesOptions(),
                          keywords=KeywordsOptions()
                          ),
        return_analyzed_text=True,
        clean=True
    ).get_result()

    return response
def ibm_concepts(
    data
):  #returns concepts related to text. Input are tweets list in the form of [[date1,[tweets]],[date2,[tweets]]] like get_tweets2 output
    l = ""
    for i in data:
        l = l + ''.join(i[1])
    l = l[0:49999]
    #print(l)
    authenticator = IAMAuthenticator(ibmapikey)
    natural_language_understanding = NaturalLanguageUnderstandingV1(
        version='2020-08-01', authenticator=authenticator)

    natural_language_understanding.set_service_url(ibmurl)

    response = natural_language_understanding.analyze(
        text=l,
        features=Features(concepts=ConceptsOptions(limit=10))).get_result()

    result = []
    for i in response['concepts']:  #result['concepts']:
        result.append(
            [i["dbpedia_resource"][7::].split('/')[-1], i["relevance"]])
    return result
Exemplo n.º 23
0
 def test_concepts(self):
     c = Features(concepts=ConceptsOptions())
     assert c._to_dict() == {'concepts': {}}
     c = Features(concepts=ConceptsOptions(limit=10))
     assert c._to_dict() == {'concepts': {'limit': 10}}
Exemplo n.º 24
0
def main(params):

    text = params['text'].replace("\n", "")
    # On envoie le texte à NLU
    response_nlu = naturalLanguageUnderstanding.analyze(
        text=text,
        features=Features(concepts=ConceptsOptions(limit=5),
                          entities=EntitiesOptions(emotion=True,
                                                   sentiment=True),
                          keywords=KeywordsOptions(emotion=True,
                                                   sentiment=True),
                          sentiment=SentimentOptions(document=True),
                          emotion=EmotionOptions(document=True))).get_result()

    # Le premier critère est que l'article parle d'une entité "Company". On boucle donc sur les entités reconnues par NLU
    company = ""
    location = ""
    i = 0
    while (i < len(response_nlu["entities"])
           and (company == "" or location == "")):
        if (response_nlu["entities"][i]["type"] == "Company"
                and company == ""):
            company = response_nlu["entities"][i]["text"]
            sentiment = response_nlu["entities"][i]["sentiment"]["score"]
            emotion_json_pointer = response_nlu["entities"][i]["emotion"]
            sadness = emotion_json_pointer["sadness"]
            joy = emotion_json_pointer["joy"]
            disgust = emotion_json_pointer["disgust"]
            anger = emotion_json_pointer["anger"]
            score_pondere_company = -0.5 * (anger + disgust + sadness -
                                            joy) + sentiment
        if (response_nlu["entities"][i]["type"] == "Location"
                and location == ""):
            location = response_nlu["entities"][i]["text"]
        i += 1

    # On collecte et stocke les valeurs des sentiments et émotions de l'article
    sentiment = response_nlu["sentiment"]["document"]["score"]
    emotion_json_pointer = response_nlu["emotion"]["document"]["emotion"]
    sadness = emotion_json_pointer["sadness"]
    joy = emotion_json_pointer["joy"]
    disgust = emotion_json_pointer["disgust"]
    anger = emotion_json_pointer["anger"]
    score_pondere = -0.5 * (anger + disgust + sadness - joy) + sentiment

    if (company != "" and score_pondere < 0.5):
        flag_article_retained = 0
        # We initialize the list of keywords, the dictionary which will store the data on the article after processing and the counter to count how many entities were detected (to further place the article in list_already_treated_MANA_articles by its relevance)
        keywords_list = []
        list_keywords_confirmed = []
        list_alerting_entities_confirmed = []
        list_sentences_confirmed = []
        list_keywords_deceitful = []
        #counter_confirmed_detected_alerting_entities=0

        for l in range(len(response_nlu["keywords"])):
            emotion_json_pointer = response_nlu["keywords"][l]["emotion"]
            sadness = emotion_json_pointer["sadness"]
            joy = emotion_json_pointer["joy"]
            disgust = emotion_json_pointer["disgust"]
            anger = emotion_json_pointer["anger"]
            sentiment = response_nlu["keywords"][l]["sentiment"]["score"]
            score_pondere_keyword = -0.5 * (anger + disgust + sadness -
                                            joy) + sentiment
            keywords_list.append(
                [response_nlu["keywords"][l]["text"], score_pondere_keyword])

        for keyword_data in keywords_list:
            keyword = keyword_data[0]
            response_bot = assistant.message(
                #workspace_id = 'a2dd5d22-63b4-4915-aac8-1c4f6fd358f6',
                workspace_id='6d7f9feb-3d05-4c0e-82b5-6c509638648c',
                input={
                    'text': keyword
                }).get_result()
            # If the bot has recognized either an alerting entity or the intent Oui_MANA or Non_MANA then the answer is different that the anything else node with text: 'No redhibitory word detected'
            if response_bot["output"]["text"] != [
                    'No redhibitory word detected'
            ]:
                if response_bot["output"]["text"] != [
                        'OuiMANA'
                ] and response_bot["output"]["text"] != ['NonMANA']:
                    position_alerting_entity = response_bot['entities'][0][
                        'location']
                    alerting_entity = response_bot['input'][
                        'text'][position_alerting_entity[0]:
                                position_alerting_entity[1]]
                    list_alerting_entities_confirmed.append(alerting_entity)
                    #counter_confirmed_detected_alerting_entities+=1
                for sentence_keyword in text.split('.'):
                    if keyword in sentence_keyword:
                        # If an alerting entity was discovered, meaning it is not one of the intents by elimination
                        #if response_bot["output"]["text"]!=['OuiMANA'] and response_bot["output"]["text"]!=['NonMANA']:
                        # We need the following little trick to catch the exact synonym of entity value that was detected in the input keyword
                        # Having collected the sentences in which this entity appears, we now send them back to the bot, whose nodes were placed with a jump to the nodes of the intents to check whether the sentences trigger the Oui_MANA or Non_MANA intent
                        confirmation_bot = assistant.message(
                            #workspace_id = 'a2dd5d22-63b4-4915-aac8-1c4f6fd358f6',
                            workspace_id='6d7f9feb-3d05-4c0e-82b5-6c509638648c',
                            input={
                                'text': sentence_keyword
                            },
                            context=response_bot["context"]).get_result()
                        if confirmation_bot["output"]["text"] == ['OuiMANA']:
                            # The value of the flag indicated that the 1st layer detected classified the article, i.e. an alerting entity was detected and its sentences were relevant for MANA
                            try:
                                assistant.create_example(
                                    #workspace_id = 'a2dd5d22-63b4-4915-aac8-1c4f6fd358f6',
                                    workspace_id=
                                    '6d7f9feb-3d05-4c0e-82b5-6c509638648c',
                                    intent='OuiMANA',
                                    text=sentence_keyword,
                                ).get_result()
                            except KeyboardInterrupt:
                                return 0
                            except:
                                pass

                            flag_article_retained = 1
                            list_keywords_confirmed.append(keyword_data)
                            list_sentences_confirmed.append(sentence_keyword)

                        elif confirmation_bot["output"]["text"] == ['NonMANA']:
                            #if response_bot["output"]["text"]!=['OuiMANA'] and response_bot["output"]["text"]!=['NonMANA']:
                            try:
                                assistant.create_example(
                                    #workspace_id = 'a2dd5d22-63b4-4915-aac8-1c4f6fd358f6',
                                    workspace_id=
                                    '6d7f9feb-3d05-4c0e-82b5-6c509638648c',
                                    intent='NonMANA',
                                    text=sentence_keyword,
                                ).get_result()
                            except KeyboardInterrupt:
                                return 0
                            except:
                                pass
                            list_keywords_deceitful.append(keyword_data)
                        # It is possible that no alerting entity was detected but that the keyword triggered the intent of the bot
                        # Hence it might be a less evident, more subtle MANA phrase with no "redhibitory words", hence the flag value 2 for 2nd layer
                        #(if the flag was not already set to 1 by the confirmation of a MANA alert detection)
                        #else:
                        #confirmation_MANA_sentence(keyword,sentence_keyword,assistant,response_bot,counter_confirmed_detected_alerting_entities,flag_article_retained)

        if flag_article_retained == 0:
            classifiers = natural_language_classifier.list_classifiers(
            ).get_result()
            response_nlc = natural_language_classifier.classify(
                classifiers["classifiers"][-1]["classifier_id"],
                text[0:2045]).get_result()
            # The flag value of 3 stands for 3rd layer
            if response_nlc['top_class'] == "Oui_MANA":
                flag_article_retained = 3

    # If the article was retained by one layer, i.e. that the flag value is not 0, we store all its information
        article_highlighted = text
        if flag_article_retained != 0:
            score_keywords_confirmed = []

            list_sentences_confirmed = list(set(list_sentences_confirmed))
            count_sentences = len(list_sentences_confirmed)
            for sentence in list_sentences_confirmed:
                article_highlighted = article_highlighted.replace(
                    sentence, '<mark style="background-color: yellow">' +
                    sentence + '</mark>')

            for k in list_keywords_confirmed:
                score_keywords_confirmed = +k[1]

            list_all_keywords = list_keywords_confirmed + list_keywords_deceitful
            list_all_keywords = list(set(map(tuple, list_all_keywords)))
            for keyword_data in list_all_keywords:
                article_highlighted = article_highlighted.replace(
                    keyword_data[0],
                    '<mark style="background-color: orange">' +
                    keyword_data[0] + "(" + str(round(keyword_data[1], 2)) +
                    ")" + '</mark>')

            list_alerting_entities_confirmed = list(
                set(list_alerting_entities_confirmed))
            for keyword in list_alerting_entities_confirmed:
                article_highlighted = article_highlighted.replace(
                    keyword, '<mark style="background-color: red">' + keyword +
                    '</mark>')

            article_highlighted = article_highlighted.replace('$', 'dollars')

            return {
                'flag': flag_article_retained,
                'location': location,
                'company': company,
                'score_company': score_pondere_company,
                'score': score_pondere,
                'count': count_sentences,
                'text': article_highlighted,
                'score_keywords_confirmed': score_keywords_confirmed
            }

        else:
            list_keywords_deceitful = list(
                set(map(tuple, list_keywords_deceitful)))
            for keyword_data in list_keywords_deceitful:
                article_highlighted = article_highlighted.replace(
                    keyword_data[0],
                    '<mark style="background-color: orange">' +
                    keyword_data[0] + "(" + str(round(keyword_data[1], 2)) +
                    ")" + '</mark>')

            return {
                'flag': flag_article_retained,
                'location': location,
                'company': company,
                'score_company': score_pondere_company,
                'score': score_pondere,
                'count': 0,
                'text': article_highlighted,
                'score_keywords_confirmed': 0
            }

    else:
        return {
            'flag': '-1',
            'location': '0',
            'company': '0',
            'score_company': '0',
            'score': '0',
            'count': '0',
            'text': text,
            'score_keywords_confirmed': '0'
        }
Exemplo n.º 25
0
def get_concepts(text):
    concepts = nlu.analyze(
        text=text,
        features=Features(concepts=ConceptsOptions(limit=1))).get_result()
    return concepts['concepts']
Exemplo n.º 26
0
# Import credentials
cred = open('.local/crd.env','r').read()
apikey,apiurl = cred.replace('NATURAL_LANGUAGE_UNDERSTANDING_IAM_APIKEY=','').replace(
                            'NATURAL_LANGUAGE_UNDERSTANDING_URL=','').split()

PATH = {}
PATH['data']    = '../data/Documents/'
PATH['results'] = './Watson-nlu-results/'

NLU = {}
NLU['apikey']         = apikey
NLU['apiurl']         = apiurl
NLU['version']        = '2019-07-12'
NLU['features']       = Features(
                        categories= CategoriesOptions(),
                        concepts  = ConceptsOptions(),
                        entities  = EntitiesOptions(),
                        keywords  = KeywordsOptions(),
                        relations = RelationsOptions(),
                        syntax    = SyntaxOptions()
                        )

##### CLASS OBJECT FOR ARCHETYPAL ANALYSIS (UNDER CONSTRUCTION). ORIGINAL FUNCIONGIN BELOW CLASS OBJECT ########

class DocumentArchetypes:
    '''
    DocumentArchetypes performs Archetypal Analysis on a corpus consisting of a set of documents, for example a set 
    of articles, books, news stories or medical dictations.
    
    Input parameters:
    
Exemplo n.º 27
0
            "If you want to go through the supervision settings, type C (for Change settings), otherwise press any other key, e.g. Enter\n"
        )
        if supervision == "C":
            ask_for_Confirmation, ask_for_mana_alert, ask_for_deceitful_alert, ask_for_Training_intent, ask_for_NLC, ask_for_save_MANA_article, ask_for_Save_Non_MANA = setting_supervision(
                namefile_to_treat)

    # On amorçe la lecture du fichier excel
    print("Analysis of text number %d is starting. \n" % text_index)
    text = sheet.cell_value(text_index, 0).replace("\n", "")
    text = identification_language_and_translation(text)
    expected_result_classification = sheet.cell_value(text_index, 1)

    # On envoie le texte à NLU
    response_nlu = naturalLanguageUnderstanding.analyze(
        text=text,
        features=Features(concepts=ConceptsOptions(limit=5),
                          entities=EntitiesOptions(emotion=True,
                                                   sentiment=True),
                          keywords=KeywordsOptions(emotion=True,
                                                   sentiment=True),
                          sentiment=SentimentOptions(document=True),
                          emotion=EmotionOptions(document=True))).get_result()
    print("Article has been processed by NLU. \n")

    # Le premier critère est que l'article parle d'une entité "Company". On boucle donc sur les entités reconnues par NLU
    company = ""
    i = 0
    while (i < len(response_nlu["entities"]) and company == ""):
        if (response_nlu["entities"][i]["type"] == "Company"):
            company = response_nlu["entities"][i]["text"]
        i += 1
def analyseText():
    options = NluOptions
    fileName = NluOptions["file"].split('.')[0]+'.txt'
    filename_converted = fileName.replace(
        " ", "-").replace("'", "").lower()
    
    ''' Prepare the text for Analysis'''
    
    with open(app.config["TRANSCRIPT_UPLOAD"]+filename_converted, 'r') as text_file:
        text = text_file.read()
        text = text.replace('%HESITATION', '')

    print(text)

    ''' Initialize a return variable '''

    myJsonDict = {}

    ''' Extract Category with NLU '''

    if options.get('category') == "True":
        response = natural_language_understanding.analyze(
            language='en',
            text=text,
            features=Features(categories=CategoriesOptions(limit=1))).get_result()

        category = response['categories'][0]

        # Return category ['label'] ['score']
        myJsonDict.update({"category": category})
    else:
        pass

    ''' Extract Concepts with NLU '''

    if options.get('concepts') == "True":
        response = natural_language_understanding.analyze(
            language='en',
            text=text,
            features=Features(concepts=ConceptsOptions(limit=3))).get_result()

        concepts = sorted(response['concepts'],
                            key=itemgetter('relevance'), reverse=True)

        myJsonDict.update({"concepts": concepts})
        # Return concepts ['text'] ['relevence'] ['dbpedia_resource']
    else:
        pass

    ''' Extract Entity with NLU '''

    if options.get('entity') == "True":
        response = natural_language_understanding.analyze(
            language='en',
            text=text,
            features=Features(entities=EntitiesOptions(limit=1))).get_result()

        entity = sorted(response['entities'],
                        key=itemgetter('relevance'), reverse=True)

        myJsonDict.update({"entity": entity[0]})
        # Return entity[0] ['type'] ['text'] ['relevance']
    else:
        pass

    ''' Extract Sentiments and Emotions with NLU '''

    if options.get('sentiments') == "True":
        response = natural_language_understanding.analyze(
            language='en',
            text=text,
            features=Features(keywords=KeywordsOptions(sentiment=True, emotion=True, limit=10))).get_result()

        keywords = sorted(response['keywords'],
                            key=itemgetter('relevance'), reverse=True)

        keywords_sentiments_emotions = []

        for i in keywords:

            keywords_sentiments_emotions_buffer = {
                'keyword': i['text'],
                'sentiment': i['sentiment']['label'],
                'emotion': ''
            }
            maximum = i['emotion']['sadness']
            keywords_sentiments_emotions_buffer['emotion'] = 'sadness'

            if i['emotion']['joy'] > maximum:
                maximum = i['emotion']['joy']
                keywords_sentiments_emotions_buffer['emotion'] = 'joy'

            elif i['emotion']['fear'] > maximum:
                maximum = i['emotion']['fear']
                keywords_sentiments_emotions_buffer['emotion'] = 'fear'

            elif i['emotion']['disgust'] > maximum:
                maximum = i['emotion']['disgust']
                keywords_sentiments_emotions_buffer['emotion'] = 'disguest'

            elif i['emotion']['anger'] > maximum:
                maximum = i['emotion']['anger']
                keywords_sentiments_emotions_buffer['emotion'] = 'anger'

            keywords_sentiments_emotions.append(
                keywords_sentiments_emotions_buffer)

        myJsonDict.update({"sentiments": keywords_sentiments_emotions})
        # Return keywords_sentiments_emotions ['keyword'] ['sentiment'] ['emotion']
    else:
        pass

    ''' Analyse tone to get top 5 positive sentences '''

    if options.get('positiveSentences') == "True":
        tone_analysis = tone_analyzer.tone(
            {'text': text},
            content_type='application/json'
        ).get_result()

        sentences_with_joy = []
        print(json.dumps(tone_analysis, indent=2))

        try:
            for tone in tone_analysis['sentences_tone']:
                try:
                    if tone['tones'][0]['tone_name'] == "Joy":
                        tempDict = {"sentence_id": tone['sentence_id'],
                                    "text": tone['text'],
                                    "score": tone['tones'][0]['score']}
                        sentences_with_joy.append(tempDict)
                except:
                    continue

            sentences_with_joy = sorted(
                sentences_with_joy, key=itemgetter('score'), reverse=True)

            myJsonDict.update(
                {"positiveSentences": sentences_with_joy[:5]})
        except:
            tempDict = {"sentence_id": '',
                        "text": 'Text file too small to get positive sentences, please try again with a bigger document.',
                        "score": '100'}
            myJsonDict.update(
                {"positiveSentences": [tempDict]})
        # return sentences_with_joy[:5] ['text'] ['score']
    else:
        pass

    ''' Pre-Processing parts of speech to plot Word Cloud '''

    response = natural_language_understanding.analyze(
        language='en',
        text=text,
        features=Features(
            syntax=SyntaxOptions(
                sentences=True,
                tokens=SyntaxOptionsTokens(
                    lemma=True,
                    part_of_speech=True,
                )))).get_result()

    verbs = []
    for i in response['syntax']['tokens']:
        if i['part_of_speech'] == 'VERB':
            verbs.append(i['text'])

    nouns = []
    for i in response['syntax']['tokens']:
        if i['part_of_speech'] == 'NOUN':
            nouns.append(i['text'])

    adj = []
    for i in response['syntax']['tokens']:
        if i['part_of_speech'] == 'ADJ':
            adj.append(i['text'])

    nouns_adjectives = []
    for x in nouns:
        nouns_adjectives.append(x)

    for y in adj:
        nouns_adjectives.append(y)

    comment_words_verbs = ' '
    comment_words_nouns_adj = ' '
    stopwords = set(STOPWORDS)

    for val in verbs:
        val = str(val)
        tokens = val.split()
        for i in range(len(tokens)):
            tokens[i] = tokens[i].lower()
        for words in tokens:
            comment_words_verbs = comment_words_verbs + words + ' '

    for val in nouns_adjectives:
        val = str(val)
        tokens = val.split()
        for i in range(len(tokens)):
            tokens[i] = tokens[i].lower()
        for words in tokens:
            comment_words_nouns_adj = comment_words_nouns_adj + words + ' '

    wordcloud_verbs = WordCloud(width=800, height=800,
                                background_color='white',
                                stopwords=stopwords,
                                min_font_size=10,
                                max_font_size=150,
                                random_state=42).generate(comment_words_verbs)

    wordcloud_nouns_adj = WordCloud(width=800, height=800,
                                    background_color='white',
                                    colormap="Dark2",
                                    stopwords=stopwords,
                                    min_font_size=10,
                                    max_font_size=150,
                                    random_state=42).generate(comment_words_nouns_adj)

    todayDate = datetime.today().strftime('%m-%d-%Y-%s')

    verbsWC = app.config["VERBS"]+todayDate+'.png'
    plt.switch_backend('Agg')
    plt.figure(figsize=(5, 5), facecolor=None)
    plt.imshow(wordcloud_verbs)
    plt.axis("off")
    plt.tight_layout(pad=0)
    plt.title("Verbs")
    plt.savefig(verbsWC, title=True)

    nounsAdjWC = app.config["NOUNS_ADJECTIVES"]+todayDate+'.png'
    plt.switch_backend('Agg')
    plt.figure(figsize=(5, 5), facecolor=None)
    plt.imshow(wordcloud_nouns_adj)
    plt.axis("off")
    plt.tight_layout(pad=0)
    plt.title("Nouns & Adjectives")
    plt.savefig(nounsAdjWC, title=True)

    wordclouds = [nounsAdjWC, verbsWC]

    myJsonDict.update({"wordclouds": wordclouds})
    # print(json.dumps(options, indent=2))
    return jsonify(myJsonDict)
overall_sentiment = []
overall_emotion = []

deviceid = []
free_data = []


# for i in range(len(data)):
#     text = data['free_text'][i]

response = natural_language_understanding.analyze(
    text='Great workers and nice department',
    features=Features(
        entities=EntitiesOptions(emotion=True, sentiment=True, limit=1),
        keywords=KeywordsOptions(emotion=True, sentiment=True,limit=1),
        concepts=ConceptsOptions(limit=1),
        emotion=EmotionOptions(document=True),
        sentiment=SentimentOptions(document=True))).get_result()

json_result = json.dumps(response, indent=2) 

# sentiment_p = json.dumps(response['sentiment']['document']['label'])
# keywords_p = json.dumps(response['keywords'][0]['text'])
# keywords_p_1 = json.dumps(response['keywords'][0]['sentiment']['label'])

# keywords_p_2 = json.dumps(response['keywords'][0]['emotion'])
# i = json.loads(keywords_p_2)
# max_val =  max(i.items(), key=operator.itemgetter(1))[0] 
# print('max val : ') 
# print(max_val)
# #emo = i.keys()
Exemplo n.º 30
0
    def form_valid(self, form):
        rep = form.save(commit=False)

        input_url = self.request.POST['url'].split("/watch?v=")[1]
        new_url = input_url[:11]
        rep.url = new_url
        # -------------
        import googleapiclient.discovery
        import os, csv, json, nltk, collections, pickle
        from nltk.sentiment.vader import SentimentIntensityAnalyzer
        from ibm_watson import NaturalLanguageUnderstandingV1
        from ibm_watson.natural_language_understanding_v1 import Features, ConceptsOptions, EmotionOptions
        from ibm_watson.natural_language_understanding_v1 import EntitiesOptions, KeywordsOptions

        nltk.download('vader_lexicon')

        ibmApiKey = os.environ['IBM_API_KEY']
        url = 'https://gateway.watsonplatform.net/natural-language-understanding/api'

        natural_language_understanding = NaturalLanguageUnderstandingV1(
            version='2018-11-16', iam_apikey=ibmApiKey, url=url)

        #os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "0"

        comments = []
        data = []
        keys = []
        only_com = []
        video_details = []

        api_service_name = 'youtube'
        api_version = "v3"
        ytApiKey = os.environ['YT_API_KEY']

        youtube = googleapiclient.discovery.build(api_service_name,
                                                  api_version,
                                                  developerKey=ytApiKey)

        vid_id = new_url
        # get details
        req_details = youtube.videos().list(
            part="snippet,contentDetails,statistics", id=vid_id)

        details = req_details.execute()
        det = details["items"][0]
        videoTitle = det["snippet"]["title"]
        viewCount = det["statistics"]["viewCount"]
        commentCount = det["statistics"]["commentCount"]
        #description = det["snippet"]["description"]
        #vidLikes = det["statistics"]["likeCount"]
        #vidDislikes = det["statistics"]["dislikeCount"]

        video_details.append(videoTitle)
        video_details.append(viewCount)
        video_details.append(commentCount)

        # get all comments
        def get_comment_threads(token=""):
            results = youtube.commentThreads().list(part="snippet,replies",
                                                    textFormat="plainText",
                                                    pageToken=token,
                                                    maxResults=100,
                                                    videoId=vid_id).execute()

            for item in results["items"]:
                comment = item["snippet"]["topLevelComment"]
                text = comment["snippet"]["textDisplay"]
                comReplies = item["snippet"]["totalReplyCount"]
                comLikes = comment["snippet"]["likeCount"]
                top = comReplies + comLikes
                comments.append([text, comLikes, comReplies, top])
                only_com.append(text)

                if 'replies' in item.keys():
                    for rep in item["replies"]["comments"]:
                        reply = rep["snippet"]["textDisplay"]
                        rep_likes = rep["snippet"]["likeCount"]
                        comments.append([reply, rep_likes, "N/A", "N/A"])
                        only_com.append(reply)

            if "nextPageToken" in results:
                return get_comment_threads(results["nextPageToken"])
            else:
                return comments

        get_comment_threads()
        all_ = ". ".join(only_com)

        # IBM
        response = natural_language_understanding.analyze(
            text=str(all_),
            language='en',
            features=Features(
                concepts=ConceptsOptions(limit=5),
                entities=EntitiesOptions(sentiment=True, limit=5),
                keywords=KeywordsOptions(limit=200))).get_result()

        # Sort Keywords

        for key in response["keywords"]:
            text = key["text"]
            count = key["count"]
            rel = key["relevance"]
            keys.append([text, count, rel])

        def sortSecond(val):
            return val[1]

        keys.sort(key=sortSecond)

        # Score sentiment
        sent_array = []

        prod_sent = []

        vn = []
        n = []
        sn = []
        neu = []  #0
        sp = []
        p = []
        vp = []

        sia = SentimentIntensityAnalyzer()
        for i in range(len(comments)):
            comment = comments[i][0]
            sent = sia.polarity_scores(comment)
            score = sent["compound"]
            #data.append([comment, score])
            sent_array.append(score)

        for i in sent_array:
            if i <= -.75:
                vn.append(i)
            elif -.75 < i <= -.5:
                n.append(i)
            elif -.5 < i <= -.25:
                sn.append(i)
            elif -.25 < i <= .25:
                neu.append(i)
            elif .25 < i <= .5:
                sp.append(i)
            elif .5 < i <= .75:
                p.append(i)
            else:
                vp.append(i)

        prod_sent.append(len(vn))
        prod_sent.append(len(n))
        prod_sent.append(len(sn))
        prod_sent.append(len(neu))
        prod_sent.append(len(sp))
        prod_sent.append(len(p))
        prod_sent.append(len(vp))

        # Spam
        from django.contrib.staticfiles import finders
        locRes = finders.find('pickles/sp_model')
        with open(locRes, 'rb') as p:
            sm = pickle.load(p)

        spam = 0
        ham = 0
        for i in range(len(comments)):
            comment = comments[i][0]
            sm.predict([comment])
            if sm.predict([comment]) == [1]:
                spam += 1
            else:
                ham += 1

        if (spam) == 0:
            perc = 0
        else:
            perc = round((spam / (spam + ham)), 4)

        # IBM concepts
        conc = []
        _concepts = []
        concept_rel = []
        for concept in response["concepts"]:
            text = concept["text"]
            relevance = concept["relevance"]
            conc.append([text, relevance])

        # Sort concepts
        conc.sort(key=sortSecond)
        for i in range(len(conc)):
            _concepts.append(conc[i][0])
            concept_rel.append(conc[i][1])

        # IBM Keyphrases
        words = []
        total_k = 0

        _keyW = []
        _keyCount = []

        for elem in keys:
            if elem not in words:
                words.append(elem)
                if len(words) >= 10:
                    break
        for i in range(len(words)):
            _keyW.append(words[i][0])
            _keyCount.append(words[i][1])
            total_k += words[i][1]

        for i in range(len(words)):
            cnt = words[i][1]
            percent = round((cnt / total_k), 4)

        # Entities (Count, Sentiment) - IBM
        total_e = 0
        ents = []
        # ---- PRODUCTION
        _entities = []
        _eCount = []
        _eSent = []
        _ePerc = []

        all_ent = []
        # ---- PRODUCTION

        for ent in response["entities"]:
            text = ent["text"]
            sent = ent["sentiment"]["label"]
            sent_score = ent["sentiment"]["score"]
            rel = ent["relevance"]
            eCount = ent["count"]
            total_e += eCount
            ents.append([text, sent, rel, eCount, sent_score])
            _eSent.append(sent_score)
            all_ent.append([text, eCount])

        # Sort Entities
        all_ent.sort(key=sortSecond)
        for i in range(len(all_ent)):
            _entities.append(all_ent[i][0])
            _eCount.append(all_ent[i][1])

        # -------------

        rep.keywords = _keyW
        rep.keyword_count = _keyCount

        rep.spam = perc
        rep.entities = _entities
        rep.entity_count = _eCount
        rep.title = video_details[0]
        rep.views = video_details[1]
        rep.comments = video_details[2]
        rep.concepts = _concepts
        rep.concept_relevance = concept_rel
        rep.sentiment = prod_sent

        form.instance.author = self.request.user
        return super().form_valid(form)