Exemplo n.º 1
0
import demoji
import parser

demoji.download_codes()


def test_is_valid_dollar_sign_match():
    a = "$GME"
    b = parser.is_dollar_sign_match(a)
    assert (b == True)


def test_not_valid_dollar_sign_match():
    a = "GME"
    b = parser.is_dollar_sign_match(a)
    assert (b == False)


def test_not_valid_dollar_sign_match_with_call_abbreviation():
    a = "$65C"
    b = parser.is_dollar_sign_match(a)
    assert (b == False)


def test_not_valid_dollar_sign_match_with_put_abbreviation():
    a = "$50P"
    b = parser.is_dollar_sign_match(a)
    assert (b == False)


def test_preprocess_and_split_text_with_emojis():
Exemplo n.º 2
0
def get_tweets(subject):
    logger.info('-- Start retrieving tweets')
    with open('credentials.json') as json_file:
        data = json.load(json_file)
        bearer_token = data['bearer_token']

    token = {'access_token': bearer_token, 'token_type': 'bearer'}
    auth = OAuth2(token=token)

    if '#' in subject:
        subject = subject.replace('#', '%23')

    url = f'https://api.twitter.com/2/tweets/search/recent?query={subject}+lang:fr+-is:retweet&max_results=100'
    r = req.get(url, auth=auth)

    txt = json.loads(r.text)
    data = txt["data"]

    for _ in range(10):
        if "next_token" not in txt["meta"]:
            break
        next_token = txt["meta"]["next_token"]
        url = f'https://api.twitter.com/2/tweets/search/recent?query={subject}+lang:fr+-is:retweet&max_results=100' \
              f'&next_token={next_token}'
        r = req.get(url, auth=auth)
        txt = json.loads(r.text)
        data += txt["data"]

    dataset = []
    textt = []
    demoji.download_codes()
    for tweet in data:
        textt.append(tweet["text"])
        txt = nlp_pipeline(tweet["text"])
        dataset.append(txt)

    #word cloud
    stop_words = set(STOPWORDS)
    with open('stop_words_french.json', encoding='utf-8') as json_file:
        stop_words_french = json.load(json_file)
    stop_words.update(stop_words_french)
    stop_words.add(subject.replace('%23', ''))
    stop_words.add(subject.replace('%23', '').lower())
    wordcloud = WordCloud(
        background_color='white',
        stopwords=stop_words,
        max_words=200,
        max_font_size=40,
        scale=3,
        random_state=1  # chosen at random by flipping a coin; it was heads
    ).generate(str(dataset))

    plt.figure(1, figsize=(12, 12))
    plt.axis('off')

    plt.imshow(wordcloud)
    plt.show()

    logger.info(f'--- Get {len(dataset)} tweets')

    return dataset
 def __init__(self):
     demoji.download_codes()
     pass
Exemplo n.º 4
0
def test_download():
    assert demoji.download_codes() is None
    assert type(demoji._EMOJI_PAT) == type(re.compile(""))  # noqa
    assert isinstance(demoji._CODE_TO_DESC, dict)
    assert os.path.isfile(demoji.CACHEPATH)
Exemplo n.º 5
0
 def __init__(self):
     """
     Constructor initializing the attributes
     """
     demoji.download_codes()
Exemplo n.º 6
0
import ast
import csv
from csv import DictWriter
import re
import pandas as pd
import demoji
import os

demoji.download_codes()  # for downloading demoji cache

df = pd.read_csv("Data/Found_Data/arifhosentamim.csv")
result = ""
read = ""
f = open("insomnia_no_usa_output.csv", "w", newline='', encoding='utf-8')
fieldnames = ['tweet_id', 'label', 'tweets']
writer: DictWriter = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()

files = os.listdir("Data/Found_Data/")
print(files)
for file in files:
    data = pd.read_csv("Data/Found_Data/" + file)
    tweet = []
    for index, row in data.iterrows():
        row['tweet'] = ast.literal_eval(row['tweet'])
        row['tweet'] = (row['tweet'].decode() if isinstance(
            row['tweet'], bytes) else row['tweet']).strip()
        row[2] = re.sub(r'@\S+', '', row[2])  # Remove mentions
        row[2] = re.sub(r'#\S+', '', row[2])  # Remove hashtags
        row[2] = re.sub(r'RT', '', row[2])
        row[2] = re.sub(r'http\S+', '', row[2])  # Remove urls
Exemplo n.º 7
0
def main():
    source = input("Enter the Source: ")
    sources = ["amazon", "facebook", "twitter"]

    if source not in sources:
        logger.error("Please enter the source as either <amazon> or <facebook> or <twitter>")
        print("Please enter the source as either <amazon> or <facebook> or <twitter>")
        raise SystemExit(sys.exit(1))
        
    #phases - function to validate the input the rerun status
    def checkInputStatus(inputoption):
        options=[0,1]
        if inputoption not in options:
            print("Please enter the option as either 0 or 1")
            raise SystemExit(sys.exit(1))
        return 1

    #phases - Get the rerun status from user
    external_data_flag = int(input("Want to upload the data externally?: "))
    checkInputStatus(external_data_flag)
    rerun = int(input("Enter the Re-run status 0/1: "))
    checkInputStatus(rerun)
    #phases - Get the processing options status    
    print("\n---------------- Enter the processing options ---------------- ")
    scrape = None
    if external_data_flag != 1:
        scrape = int(input("\nDo you want to process Scraping 0/1: "))
        checkInputStatus(scrape)
    preproc = int(input("\nDo you want to process Pre processing  0/1: "))
    checkInputStatus(preproc)
    feature = int(input("\nDo you want to process Feature Extraction 0/1: "))
    checkInputStatus(feature)
    clustering = int(input("\nDo you want to process Clustering 0/1: "))
    checkInputStatus(clustering)
    visual = int(input("\nDo you want to process Visualization 0/1: "))
    checkInputStatus(visual)
    
    #phases - Validate the processing options status
    if external_data_flag != 1:
        processoption=str(scrape)+str(preproc)+str(feature)+str(clustering)+str(visual)
    else:
        processoption = str(preproc) + str(feature) + str(clustering) + str(visual)

    if processoption in ['00000']:
        print("\n Not proceeding with any processing------------ END ")
        raise SystemExit(sys.exit(1))
    if processoption in ['11111'] and rerun in [1]:
        print("\n As rerun option is 1, cannot execute all processing------------ END ")
        raise SystemExit(sys.exit(1))
    
    #phases - function to denote the END of processing
    def endprocess():
        logger.info("Total elapsed time: {0}".format(time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))))
        logger.info("End...!!!")
        raise SystemExit(sys.exit(1))

    config = config_ini()

    """
    Pre-requisites
    """
    global keyword, data_tw_post, tw_data_pped, nlp_server, tw_data_emotions, tw_data_clustering
    driver = None
    if not source == "twitter":
        if external_data_flag != 1:
            driver = webdriver.Chrome(executable_path=config['PATHS']['CHROME_DRIVER'])
        demoji.download_codes()

    stanfordnlp_loc = config['PATHS']['SUPPORTING_FILES'] + '\\stanford-corenlp-full-2018-10-05' +"\\"
    cmd = "java -mx4g -cp " + '"*"' + " edu.stanford.nlp.pipeline.StanfordCoreNLPServer"
    nlp_server = subprocess.Popen(cmd, cwd=stanfordnlp_loc)
    spacy_nlp = spacy.load('en_core_web_sm')
    spacy_nlp.add_pipe(LanguageDetector(), name="language_detector", last=True)
    if not os.path.isdir(config['PATHS']['SUPPORTING_FILES'] + '\\en_ewt_models'):
        stanfordnlp.download('en', resource_dir=config['PATHS']['SUPPORTING_FILES'])
        demoji.download_codes()

    try:
        if not any([os.path.isdir(nltk.data.find('tokenizers/punkt')),os.path.isdir(nltk.data.find('corpora/stopwords'))]):
            pass
    except LookupError as e:
        nltk.download('punkt')
        nltk.download('stopwords')

    sentiment_nlp = StanfordCoreNLP('http://*****:*****@role = 'button']")
                        LogInButton.click()
                        username = driver.find_element_by_id("m_login_email")
                        username.clear()
                        username.send_keys(int(config['FB_LOGINS']['CONTACTNO']))
                        password = driver.find_element_by_id("m_login_password")
                        password.clear()
                        password.send_keys(config['FB_LOGINS']['PASSWORD'])
                        driver.find_element_by_name("login").click()

                        time.sleep(7)
                        fbpostforcomments = copy.deepcopy(data_fb_post)

                        fbpostforcomments = fbpostforcomments.dropna(subset=['post_url'])
                        fb_comments = fbpostforcomments['post_url'].apply(lambda x: fbcomments.scrapeFbComments(x, driver))
                        fb_reviews = pd.concat([r for r in fb_comments], ignore_index=True)

                        fb_reviews = pd.merge(fb_reviews, data_fb_post, left_on='post', right_on='post_url', how="left")
                        fb_reviews = fb_reviews.drop_duplicates(subset='commentWithAuthorname')

                    logger.info("--------------------- Scraping is Completed...!!! -------------------------")
                    logger.info("Exporting as csv into Output Path. Please wait...!!!")
                    fb_reviews.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\fb_input_data_" + keyword + ".csv", index=False)
                    if processoption in ['10000']:
                        endprocess()

            """
            Data Pre-Processing
            """
            if preproc in [1]:
                logger.info("------------- Data Pre-processing is Initiated. Please wait...!!! ---------")
                if scrape not in [1]:
                    try:
                        if external_data_flag != 1:
                            fb_reviews=pd.read_csv(config['PATHS']['BASEDIR'] + "\\outputs\\fb_input_data_" + keyword + ".csv")
                        else:
                            keyword = input("Enter the Keyword: ")
                            file_upload = easygui.fileopenbox()
                            fb_reviews = pd.read_csv(file_upload)
                    except Exception as e:
                        print("\n Scraping output file is not available at the mentioned path")
                        logger.error("Exception: {}".format(e))
                
                fb_data_pp = preprocReviews.fbPreProcess(fb_reviews, spacy_nlp)
                fb_data_pped = preprocReviews.create_final_input(fb_data_pp, demoji)
            
                logger.info("---------------- Data Pre-processing is Completed...!!! -------------------")
                logger.info("Exporting as csv into Output Path. Please wait...!!!")
                fb_data_pped.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\fb_data_preprocessed_" + keyword + ".csv",
                                index=False)
                if processoption[-3:] in ['000']:
                    endprocess()

            """
            Features Extraction: Sentiments
            """
            if feature in [1]:
                logger.info("-------------------------- Features Extraction  ----------------------------")
                logger.info("Sentiments Extraction is in Progress. Please wait..!!!")
                if preproc not in [1]:
                    try:
                        fb_data_pped = pd.read_csv(config['PATHS']['BASEDIR'] + "\\outputs\\fb_data_preprocessed_" + keyword + ".csv")
                    except Exception as e:
                        print("\n Data processing output file is not available at the mentioned path")
                        logger.error("Exception: {}".format(e))
                fb_data_sentiment = copy.deepcopy(fb_data_pped)
                fb_data_sentiment['sentiment_new'] = fb_data_sentiment['sentence'].apply(
                lambda x: sentiments.extract_sentiment(x, sentiment_nlp))
                logger.info("Sentiments Extraction is Completed...!!!")

                logger.info("Exporting as csv into Output Path. Please wait...!!!")
                fb_data_sentiment.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\fb_data_sentiments_" + keyword + ".csv",
                                     index=False)

                """
                Features Extraction: Themes
                """
                logger.info("Themes Extraction is in Progress. Please wait..!!!")

                fb_data_themes = copy.deepcopy(fb_data_sentiment)
                fb_data_themes = themes.tag_themes(fb_data_themes, spacy_nlp, nlp)
                logger.info("Themes Extraction is Completed...!!!")

                logger.info("Exporting as csv into Output Path. Please wait...!!!")
                fb_data_themes.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\fb_data_themes_" + keyword + ".csv",
                                  index=False)

                """
                Features Extraction: Emotions
                """
                logger.info("Emotions Extraction is in Progress. Please wait..!!!")

                english_stopwords = stopwords.words('english')
                fb_data_emotions = emotions.tag_emotions(fb_data_themes, english_stopwords, nlp)
            
                logger.info("Emotions Extraction is Completed...!!!")
                logger.info("Exporting as csv into Output Path. Please wait...!!!")
                fb_data_emotions.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\fb_data_emotions_" + keyword + ".csv",
                                    index=False)

                logger.info("------------------ Features Extraction is Completed...!!! -----------------")
                if processoption[-2:] in ['00']:
                    endprocess()
            nlp_server.kill()

            """
            Features Extraction: Clustering
            """
            if clustering in [1]:
                logger.info("--------------- Clustering is in Progress. Please wait...!!! ---------------")
                themes_map_data = pd.read_csv(config['PATHS']['BASEDIR'] + "\\common_files\\theme_mapping.csv",
                                          error_bad_lines=False, encoding='ISO-8859-1')
                emotions_map_data = pd.read_csv(config['PATHS']['BASEDIR'] + "\\common_files\\emotion_mapping.csv",
                                            error_bad_lines=False, encoding='ISO-8859-1')
                if feature not in [1]:
                    try:
                        fb_data_emotions = pd.read_csv(config['PATHS']['BASEDIR'] + "\\outputs\\fb_data_emotions_" + keyword + ".csv")
                    except Exception as e:
                        print("\n Emotion extraction output file is not available at the mentioned path")
                        logger.error("Exception: {}".format(e))
                fb_data_clustering = cluster.cluster_theme_keywords(fb_data_emotions, themes_map_data)
                fb_data_clustering = cluster.cluster_emotion_keywords(fb_data_clustering, emotions_map_data)
                logger.info("Clustering is Completed...!!!")

                logger.info("Exporting as csv into Output Path. Please wait...!!!")
                fb_data_clustering.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\fb_data_clustering_" + keyword + ".csv",
                                      index=False)
                logger.info("-------------------- Clustering is Completed...!!! --------------------------")
                if processoption[-1:] in ['0']:
                    endprocess()

            """
            Visualization
            """
            if visual in [1]:
                logger.info("------------------- Visualization is Initiated. Please wait...!!! -----------")
            
                features = ["themes_keyword", "emotion_keyword", "theme_groups", "emotion_groups"]
                feature_groups = ["theme_groups", "emotion_groups"]
                feature1 = feature_groups[0]
                feature2 = feature_groups[1]

                brand = keyword
                if clustering not in [1]:
                    try:
                        fb_data_clustering = pd.read_csv(config['PATHS']['BASEDIR'] + "\\outputs\\fb_data_clustering_" + keyword + ".csv")
                    except Exception as e:
                        print("\n Clustering output file is not available at the mentioned path")
                        logger.error("Exception: {}".format(e))
                viz_data = copy.deepcopy(fb_data_clustering)
                viz_data = viz_data.loc[viz_data[feature1].notnull(), :]
                viz_data.index = range(len(viz_data))

                viz.plotWordCloud(config=config, source=source, data=viz_data, brand=brand, features=features)
                #viz.frequencyBubblePlot(config=config, source=source, data=viz_data, brand=brand, features=features)
                viz.fitModelAndDraw(config=config, source=source, data=viz_data, __title__=feature1 + 'v/s' + feature2,
                                brand=brand, feature1=feature1, feature2=feature2)
                viz.contigencyTable(config=config, source=source, data=viz_data, brand=brand, feature1=feature1,
                                feature2=feature2)
                viz.frequencyDistribution(config=config, source=source, features=features, data=viz_data, brand=brand)

                logger.info("-------------------- Visualization Completed...!!! --------------------------")
                logger.info("Total elapsed time: {0}".format(time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))))
                logger.info("End...!!!")

        except Exception as e:
            nlp_server.kill()
            logger.error("Exception: {}".format(e))

    elif source == "amazon":
        try:
            """
            Data Scrapping
            """
            if external_data_flag != 1:
                keyword = input("Enter the Keyword: ")
                if scrape in [1]:
                    logger.info("---------------- Scrapping is Initiated. Please wait...!!! ----------------")
                    rev_lnk_scrp = int(input("\nDo you want to upload the review links externally  0/1: "))
                    checkInputStatus(rev_lnk_scrp)
                    if rev_lnk_scrp in [1]:
                        review_link_df = pd.read_csv(config['PATHS']['BASEDIR'] + "\\common_files\\review_link.csv",
                                             error_bad_lines=False)
                    else:
                        logger.info("---------------- Review link extraction is Initiated. Please wait...!!! ----------------")
                        review_link_df1 = amzreviewlinkscrapper.getreview_link(keyword)
                        logger.info("---------------- Review link extraction is completed. ----------------")
                        review_link_df = review_link_df1.rename(columns = {"review_links":"Review_Link_Href","total_review_count":"Review_Count","product_name":"Name"})
                    review_link_df = review_link_df.drop_duplicates(subset='Review_Link_Href')
                    review_link_df = review_link_df.dropna(subset=['Review_Link_Href'], axis=0)
                    review_link_df['linkset'] = review_link_df.apply(amzscraper.create_linkset, axis=1)
                    review_link_df['linkset2'] = review_link_df['linkset'].apply(lambda x: '|'.join(x))
                    all_links_df = review_link_df['linkset2'].str.split("|", expand=True)
                    total_number_of_pages = len(all_links_df.columns)
                    logger.info("Total no. of Review-Links Scraped: {}".format(len(all_links_df)))
                    review_link_df = pd.concat([review_link_df, all_links_df], axis=1)
                    review_link_df = pd.melt(review_link_df,
                                         id_vars=['Name', 'Review_Link_Href','Review_Count', 'linkset', 'linkset2'],
                                         value_vars=list(range(0, total_number_of_pages)), value_name='Final_link')
                    review_link_df = review_link_df.sort_values(by=['Review_Link_Href', 'variable'], ascending=[True, True])
                    review_link_df1 = review_link_df[review_link_df['Final_link'].isna() == False]

                    list_dataframe = review_link_df1['Final_link'].apply(lambda x: amzscraper.scrap_reviews(x, driver))
                    reviews_df_stacked = pd.concat([r for r in list_dataframe], ignore_index=True)
                    amz_reviews_data = pd.merge(reviews_df_stacked, review_link_df1, left_on='review_link',
                                            right_on='Final_link', how="left")

                    amz_reviews_data = amz_reviews_data.sort_values(by=['Review_Link_Href', 'Final_link'],
                                                                ascending=[True, True])

                    logger.info("--------------------- Scraping is Completed...!!! -------------------------")
                    logger.info("Exporting as csv into Output Path. Please wait...!!!")
                    amz_reviews_data.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\amz_input_data_" + keyword + ".csv",
                                        index=False)
                    if processoption in ['10000']:
                        endprocess()

            """
            Data Pre-Processing
            """
            if preproc in [1]:
                logger.info("------------- Data Pre-processing is Initiated. Please wait...!!! ---------")
                if scrape not in [1]:
                    try:
                        if external_data_flag != 1:
                            amz_reviews_data=pd.read_csv(config['PATHS']['BASEDIR'] + "\\outputs\\amz_input_data_" + keyword + ".csv")
                        else:
                            keyword = input("Enter the Keyword: ")
                            file_upload = easygui.fileopenbox()
                            amz_reviews_data = pd.read_csv(file_upload)
                    except Exception as e:
                        print("\n Scraping output file is not available at the mentioned path")
                        logger.error("Exception: {}".format(e))
                amz_data_pp = preprocReviews.amazonPreProcess(amz_reviews_data, spacy_nlp)
                amz_data_pped = preprocReviews.create_final_input(amz_data_pp, demoji)
            
                logger.info("---------------- Data Pre-processing is Completed...!!! -------------------")
                logger.info("Exporting as csv into Output Path. Please wait...!!!")
                amz_data_pped.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\amz_data_preprocessed_" + keyword + ".csv",
                                 index=False)
                if processoption[-3:] in ['000']:
                    endprocess()

            """
            Features Extraction: Sentiments
            """
            if feature in [1]:
                logger.info("-------------------------- Features Extraction  ----------------------------")
                logger.info("Sentiments Extraction is in Progress. Please wait..!!!")
                if preproc not in [1]:
                    try:
                        amz_data_pped = pd.read_csv(config['PATHS']['BASEDIR'] + "\\outputs\\amz_data_preprocessed_" + keyword + ".csv")
                    except Exception as e:
                        print("\n Data processing output file is not available at the mentioned path")
                        logger.error("Exception: {}".format(e))

                amz_data_sentiment = copy.deepcopy(amz_data_pped)
                amz_data_sentiment['sentiment_new'] = amz_data_sentiment['sentence'].apply(
                lambda x: sentiments.extract_sentiment(x, sentiment_nlp))
                logger.info("Sentiments Extraction is Completed...!!!")

                logger.info("Exporting as csv into Output Path. Please wait...!!!")
                amz_data_sentiment.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\amz_data_sentiments_" + keyword + ".csv",
                                      index=False)

                """
                Features Extraction: Themes
                """
                logger.info("Themes Extraction is in Progress. Please wait..!!!")

                amz_data_themes = copy.deepcopy(amz_data_sentiment)
                amz_data_themes = themes.tag_themes(amz_data_themes, spacy_nlp, nlp)
                logger.info("Themes Extraction is Completed...!!!")

                logger.info("Exporting as csv into Output Path. Please wait...!!!")
                amz_data_themes.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\amz_data_themes_" + keyword + ".csv",
                                   index=False)

                """
                Features Extraction: Emotions
                """
                logger.info("Emotions Extraction is in Progress. Please wait..!!!")

                english_stopwords = stopwords.words('english')
                amz_data_emotions = emotions.tag_emotions(amz_data_themes, english_stopwords, nlp)
                logger.info("Emotions Extraction is Completed...!!!")

                logger.info("Exporting as csv into Output Path. Please wait...!!!")
                amz_data_emotions.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\amz_data_emotions_" + keyword + ".csv",
                                     index=False)

                logger.info("------------------ Features Extraction is Completed...!!! -----------------")
                if processoption[-2:] in ['00']:
                    endprocess()
            nlp_server.kill()

            """
            Features Extraction: Clustering
            """
            if clustering in [1]:
                logger.info("--------------- Clustering is in Progress. Please wait...!!! ---------------")
            
                themes_map_data = pd.read_csv(config['PATHS']['BASEDIR'] + "\\common_files\\theme_mapping.csv",
                                          error_bad_lines=False, encoding='ISO-8859-1')
                emotions_map_data = pd.read_csv(config['PATHS']['BASEDIR'] + "\\common_files\\emotion_mapping.csv",
                                            error_bad_lines=False, encoding='ISO-8859-1')
                if feature not in [1]:
                    try:
                        amz_data_emotions = pd.read_csv(config['PATHS']['BASEDIR'] + "\\outputs\\amz_data_emotions_" + keyword + ".csv")
                    except Exception as e:
                        print("\n Emotion extraction output file is not available at the mentioned path")
                        logger.error("Exception: {}".format(e))
                        
                amz_data_clustering = cluster.cluster_theme_keywords(amz_data_emotions, themes_map_data)
                amz_data_clustering = cluster.cluster_emotion_keywords(amz_data_clustering, emotions_map_data)
                logger.info("-------------------- Clustering is Completed...!!! --------------------------")

                logger.info("Exporting as csv into Output Path. Please wait...!!!")
                amz_data_clustering.to_csv(config['PATHS']['BASEDIR'] + "\\outputs\\amz_data_clustering_" + keyword + ".csv", index=False)
                if processoption[-1:] in ['0']:
                    endprocess()

            """
            Visualization
            """
            if visual in [1]:
                logger.info("------------------- Visualization is Initiated. Please wait...!!! -----------")
            
                features = ["themes_keyword", "emotion_keyword", "theme_groups", "emotion_groups"]
                feature_groups = ["theme_groups", "emotion_groups"]
                feature1 = feature_groups[0]
                feature2 = feature_groups[1]

                brand = keyword
                if clustering not in [1]:
                    try:
                        amz_data_clustering = pd.read_csv(config['PATHS']['BASEDIR'] + "\\outputs\\amz_data_clustering_" + keyword + ".csv")
                    except Exception as e:
                        print("\n Clustering output file is not available at the mentioned path")
                        logger.error("Exception: {}".format(e))
                viz_data = copy.deepcopy(amz_data_clustering)
                viz_data = viz_data.loc[viz_data[feature1].notnull(), :]
                viz_data.index = range(len(viz_data))

                viz.plotWordCloud(config=config, source=source, data=viz_data, brand=brand, features=features)
                #viz.frequencyBubblePlot(config=config, source=source, data=viz_data, brand=brand, features=features)
                viz.fitModelAndDraw(config=config, source=source, data=viz_data, __title__=feature1 + 'v/s' + feature2,
                                brand=brand, feature1=feature1, feature2=feature2)
                viz.contigencyTable(config=config, source=source, data=viz_data, brand=brand, feature1=feature1,
                                feature2=feature2)
                viz.frequencyDistribution(config=config, source=source, features=features, data=viz_data, brand=brand)

                logger.info("-------------------- Visualization Completed...!!! --------------------------")
                logger.info("Total elapsed time: {0}".format(time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))))
                logger.info("End...!!!")

        except Exception as e:
            nlp_server.kill()
            logger.error("Exception: {}".format(e))

    return 1
Exemplo n.º 8
0
from selenium import webdriver
import demoji #(pip install demoji) after that demoji.download_codes()
import re
import nltk
nltk.download('stopwords')
nltk.download('wordnet')

from nltk.corpus import stopwords
import string
from nltk.tokenize import word_tokenize,TweetTokenizer
from nltk.stem.wordnet import WordNetLemmatizer
import os

#import nltk
from scipy.stats import entropy
demoji.download_codes() # (Required for removing emojis from a text data)

app = Flask(__name__)


#GOOGLE_CHROME_PATH = '/app/.apt/usr/bin/google_chrome'
#CHROMEDRIVER_PATH = '/app/.chromedriver/bin/chromedriver'


#Load the trained models using pickle
lda = pickle.load(open('lda_model','rb'))
dictionary = pickle.load(open('dictonary','rb'))
corpus = pickle.load(open('corpus','rb'))

## Processing Text
train_data = pa.read_csv('training_data.csv')