Python topic_summuary примеры, biterm.utility.topic_summuary Python примеры использования

Пример #1

0

Показать файл

    def compute_values(self, kmin, kmax, kstep):
        # vectorize doc
        vec = CountVectorizer()
        X = vec.fit_transform(self.docs)
        
        # get vocabulary and biterms from docs
        vocab = np.array(vec.get_feature_names())
        biterms = vec_to_biterms(X)

        # create a BTM and pass the biterms to train it
        btm = oBTM(num_topics = 20, V = vocab)
        topics = btm.fit_transform(biterms, iterations=100)
        topic_summuary(btm.phi_wz.T, X, vocab, 10)

Пример #2

0

Показать файл

def lightningBTM(num_top, vocabulary, b_terms, x1):
    btm = oBTM(num_topics=num_top, V=vocabulary) #create the btm object
    start_time = time.time()
    for i in range(0, len(b_terms), 100): #process chunks of 200 texts
        biterms_chunk = b_terms[i:i + 100]
        btm.fit(biterms_chunk, iterations=10) #only 10 iterations in this version, instead of 50
    topics = btm.transform(b_terms)
    end_time = time.time()
    run_time = end_time - start_time
    print("For k = %s topics.." % num_top)
    print("BTM online took %s seconds to train" % run_time)
    #Examine topic coherence scores:
    print("\nTopic Coherence:")
    topic_summuary(btm.phi_wz.T, x1, vocabulary, 10)

Пример #3

0

Показать файл

Файл: biterm-topic-model.py Проект: Calvin-CS/slo-classifiers

def biterm_topic_model_topic_extraction():
    """
    Function performs topic extraction on Tweets using the Gensim HDP model.

    :return: None.
    """
    # LDA can only use raw term counts for LDA because it is a probabilistic graphical model.
    tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2, max_features=1000, stop_words='english')
    tf = tf_vectorizer.fit_transform(slo_feature_series)
    tf_feature_names = tf_vectorizer.get_feature_names()

    log.info(f"\n.fit_transform - Learn the vocabulary dictionary and return term-document matrix.")
    log.info(f"{tf}\n")
    log.info(f"\n.get_feature_names - Array mapping from feature integer indices to feature name")
    log.info(f"{tf_feature_names}\n")

    # Convert corpus of documents (vectorized text) to numpy array.
    tf_array = tf.toarray()

    # Convert dictionary of words (vocabulary) to numpy array.
    tf_feature_names = np.array(tf_vectorizer.get_feature_names())

    # get biterms
    biterms = vec_to_biterms(tf_array)

    # create btm
    btm = oBTM(num_topics=20, V=tf_feature_names)

    print("\n\n Train Online BTM ..")
    for i in range(0, len(biterms), 100):  # prozess chunk of 200 texts
        biterms_chunk = biterms[i:i + 100]
        btm.fit(biterms_chunk, iterations=50)
    topics = btm.transform(biterms)
    time.sleep(3)

    # print("\n\n Visualize Topics ..")
    # vis = pyLDAvis.prepare(btm.phi_wz.T, topics, np.count_nonzero(tf_array, axis=1), tf_feature_names, np.sum(tf_array, axis=0))
    # pyLDAvis.save_html(vis, './vis/online_btm.html')

    print("\n\n Topic coherence ..")
    topic_summuary(btm.phi_wz.T, tf_array, tf_feature_names, 10)

    print("\n\n Texts & Topics ..")
    for i in range(1, 10):
        print("{} (topic: {})".format(slo_feature_series[i], topics[i].argmax()))

Пример #4

0

Показать файл

if __name__ == "__main__":

    texts = open('./data/reuters.titles').read().splitlines()[:50]

    # vectorize texts
    vec = CountVectorizer(stop_words='english')
    X = vec.fit_transform(texts).toarray()

    # get vocabulary
    vocab = np.array(vec.get_feature_names())

    # get biterms
    biterms = vec_to_biterms(X)

    # create btm
    btm = oBTM(num_topics=20, V=vocab)

    print("\n\n Train BTM ..")
    topics = btm.fit_transform(biterms, iterations=100)

    # print("\n\n Visualize Topics ..")
    # vis = pyLDAvis.prepare(btm.phi_wz.T, topics, np.count_nonzero(X, axis=1), vocab, np.sum(X, axis=0))
    # pyLDAvis.save_html(vis, './vis/simple_btm.html')

    print("\n\n Topic coherence ..")
    topic_summuary(btm.phi_wz.T, X, vocab, 10)

    print("\n\n Texts & Topics ..")
    for i in range(len(texts)):
        print("{} (topic: {})".format(texts[i], topics[i].argmax()))

Пример #5

0

Показать файл

Файл: helpers.py Проект: vojtsek/joint-induction

 def compute_topics(self):
     self.btm = oBTM(num_topics=self.num_topics, V=self.vocab)
     self.topics = self.btm.fit_transform(self.biterms, iterations=10)
     topic_summuary(self.btm.phi_wz.T, self.X, self.vocab, 5)
     return self.topics

Пример #6

0

Показать файл

Файл: biterm_trainer.py Проект: junronglau/product-defects-mining

 def generate_topics(self, data):
     topic_summuary(self.model.phi_wz.T, data, self.dictionary, 10)

Пример #7

0

Показать файл

Файл: BTM_model1.py Проект: PenTompkins/Tompkins_OUDSA5900

def perform_BTM(fpath, num_top):
    company_data = pd.read_excel(fpath)
    company_name = company_data.iloc[
        0, company_data.columns.get_loc("Author Name")]
    print("\n\n\n\n\nBeginning BTM modeling for %s" % company_name)
    print("This is using %s topics" % num_top)

    #Remove retweets from the company account, as they aren't technically company account tweets
    patternDel = "^RT @"
    filter1 = company_data["Content"].str.contains(patternDel)
    company_tweets = company_data[~filter1].copy()

    #Remove/replace 'smart' apostrophes and quotation marks with standard keyboard equivalents:
    company_tweets["Content"] = company_tweets["Content"].str.replace(
        r"’", "'")  #replace closing smart apostrophes with regular apostrophe
    company_tweets["Content"] = company_tweets["Content"].str.replace(
        r"‘", "'")  #replace opening smart apostrophes with regular apostrophe
    company_tweets["Content"] = company_tweets["Content"].str.replace(
        r"“", "\"")  #replace opening smart quotes with regular quotes
    company_tweets["Content"] = company_tweets["Content"].str.replace(
        r"”", "\"")  #replace closing smart quotes with regular quotes

    #Examine tweets after removing/replacing 'smart' apostrophes and quotes:
    #print(company_tweets["Content"].head(5))

    #Remove apostrophes followed by 's' and replace with nothing (Disney's becomes Disney):
    company_tweets["Content"] = company_tweets["Content"].str.replace(
        r"'s", "")

    #Remove remaining apostrophes and replace with nothing (convert I'm to Im and such):
    company_tweets["Content"] = company_tweets["Content"].str.replace(r"'", "")

    #Perform standardization on the textual contents of the company's tweets:
    #No longer keep newline chars in text, replace double spaces with spaces, now keeping hashtag symbols themselves
    #def standardize_text(df, text_field):
    #    df[text_field] = df[text_field].str.replace(r".", "") #remove/replace periods w/ nothing. Should now count acronyms as one word
    #    df[text_field] = df[text_field].str.replace(r"&", "and") #replace ampersands with 'and'
    #    df[text_field] = df[text_field].str.replace(r"http\S+", "") #remove links and replace w/ nothing
    #    df[text_field] = df[text_field].str.replace(r"http", "") #ensure all links have been removed
    #    df[text_field] = df[text_field].str.replace(r"@\S+", "") #remove @username mentions and replace with nothing
    #    df[text_field] = df[text_field].str.replace(r"[^A-Za-z0-9(),!?#@\'\`\"\_]", " ")#Remove/replace anything that's not capital/lowercase letter, number, parentheses, comma, or any of the following symbols with a space
    #    df[text_field] = df[text_field].str.replace(r"@", "at") #replace any remaining '@' symbols with 'at'
    #    df[text_field] = df[text_field].str.lower() #convert all remaining text to lowercase
    #    #remove double spaces and replace with single space
    #    df[text_field] = df[text_field].str.replace(r"\s+", " ")
    #    return df

    textual_tweets = standardize_text(company_tweets, "Content")

    #Examine tweets after standardization has been performed:
    #print(textual_tweets["Content"].head(5))

    #Perform lemmatization on the textual contents of the tweets:
    ##! Code for this function derived from the following link: https://www.machinelearningplus.com/nlp/lemmatization-examples-python/
    #from textblob import TextBlob, Word

    #def lem_with_postag(df, text_field):
    #    tag_dict = {"J": 'a',
    #                "N": 'n',
    #                "V": 'v',
    #                "R": 'r'}
    #    output = []
    #    for tweet in df[text_field]:
    #        sent = TextBlob(tweet)
    #        words_and_tags = [(w, tag_dict.get(pos[0], 'n')) for w, pos in sent.tags]
    #        lemmatized_list = [wd.lemmatize(tag) for wd, tag in words_and_tags]
    #        lemTweet = " ".join(lemmatized_list)
    #        output.append(lemTweet)
    #    return output

    textual_tweets["Content"] = lem_with_postag(textual_tweets, "Content")
    #print(textual_tweets["Content"].head(5))

    #Removing tweets that weren't originally in English
    English_tweets = textual_tweets[textual_tweets["Language"] == "en"]

    #Removing rows with no text left inside them
    filter1 = English_tweets["Content"] != ""
    cleanGlish_tweets = English_tweets[filter1]

    #Creating tokens:
    #from nltk.tokenize import RegexpTokenizer

    #tokenizer = RegexpTokenizer(r'\w+')

    #cleanGlish_tweets["tokens"] = cleanGlish_tweets["Content"].apply(tokenizer.tokenize)

    #Remove stop words from the data:
    #from nltk.corpus import stopwords
    stop_words = set(stopwords.words("english"))

    ##Expand on the initial set of stopwords:
    stop_words2 = pd.DataFrame(stop_words)
    stop_words2["Words"] = stop_words
    add_stopwords = stop_words2["Words"].str.replace(
        r"'",
        "")  #replace apostrophes in initial set of stopwords with nothing

    #Add the newly created stopwords to the original set:
    for word in add_stopwords:
        if word not in stop_words:
            stop_words.add(word)

    #These words need to be added manually to the set of stopwords:
    stop_words.add("wed")
    stop_words.add("us")
    #Lemmatization, for some reason, converts "us" to "u". Therefore, "u" should be added as a stopword as well (for lemmatized versions)
    stop_words.add("u")

    ##!!This doesn't seem to be compatible with BTM!
    #filtered_toks = []

    #Filter out the stop words:
    #for w in cleanGlish_tweets["tokens"]: #tweet tokens
    #    for j in w: #word tokens within each tweet
    #        if j not in stop_words:
    #            filtered_toks.append(j)

    ##!Seems possible that I need to filter out tweets with less than 3 words remaining for below to work:
    #from nltk.tokenize import RegexpTokenizer

    #tokenizer = RegexpTokenizer(r'\w+')

    #cleanGlish_tweets["tokens"] = cleanGlish_tweets["Content"].apply(tokenizer.tokenize)
    #print("Before filtering out tweets with 3 words or less, cleanGlish has %s tweets" % len(cleanGlish_tweets["Content"]))
    #Filter out tweets with less than 3 words:
    #cleanGlish_tweets["num_words"] = [len(token) for token in cleanGlish_tweets["tokens"]]
    #cleanGlish_tweets2 = cleanGlish_tweets[cleanGlish_tweets["num_words"] >= 3].copy()
    #print("After filtering, cleanGlish2 has %s tweets" % len(cleanGlish_tweets2["Content"]))
    #print("Breakpoint")

    ##Vectorize the cleaned tweets
    #from sklearn.feature_extraction.text import CountVectorizer

    #Filter out stopwords here:
    vec = CountVectorizer(stop_words=stop_words)
    ##Seems that a potential problem above is that I'm filtering out tweets w/ less than 3 words before stopword removal:
    ##Thus, making it possible that tweets with less than 3 counting words are being fed to the model
    ##!!I think I can supply my own set of stopwords above, rather than use CountVectorizer's pre-defined set
    #print("Stop words:")
    #for word in vec.stop_words:
    #    print(word)

    #Save CountVectorizer's set of stop words:
    #stop_words = [word for word in vec.stop_words]
    #print("Stop words variable:")
    #for word in stop_words:
    #    print(word)

    ##Filter out tweets w/ less than 3 words after stop word removal:
    #def clean_tokenize(df, text_field, stop_set):
    #    output = []
    #    for tweet in df[text_field]:
    #        clean_toks = []
    #        for tok in tweet:
    #            if tok not in stop_set:
    #                clean_toks.append(tok)
    #        output.append(clean_toks)
    #    return output

    #from nltk.tokenize import RegexpTokenizer

    tokenizer = RegexpTokenizer(r'\w+')

    cleanGlish_tweets["tokens"] = cleanGlish_tweets["Content"].apply(
        tokenizer.tokenize)
    cleanGlish_tweets["clean_tokens"] = clean_tokenize(cleanGlish_tweets,
                                                       "tokens", stop_words)

    #print("Token differences:")
    #print(cleanGlish_tweets["tokens"].head(5))
    #print(cleanGlish_tweets["clean_tokens"].head(5))

    print(
        "Before filtering out tweets with 3 words or less, cleanGlish has %s tweets"
        % len(cleanGlish_tweets["Content"]))
    #Filter out tweets with less than 3 words:
    cleanGlish_tweets["num_words"] = [
        len(token) for token in cleanGlish_tweets["clean_tokens"]
    ]
    cleanGlish_tweets2 = cleanGlish_tweets[
        cleanGlish_tweets["num_words"] >= 3].copy()
    print("After filtering, cleanGlish2 has %s tweets" %
          len(cleanGlish_tweets2["Content"]))
    #Determine if filtering out tweets with less than 3 words after stop word removal makes a difference
    #cleanGlish_tweets["num_words2"] = [len(token) for token in cleanGlish_tweets["tokens"]]
    #cleanGlish_tweets3 = cleanGlish_tweets[cleanGlish_tweets["num_words2"] >=3].copy()
    #print("Originally, cleanGlish2 would have had %s tweets" % len(cleanGlish_tweets3["Content"]))

    #print("Breakpoint")

    X = vec.fit_transform(cleanGlish_tweets2["Content"]).toarray()
    #print("X looks like:")
    #print(X)

    #Get the vocabulary and the biterms from the tweets:
    #from biterm.utility import vec_to_biterms, topic_summuary

    vocab = np.array(vec.get_feature_names())
    #print("Vocab is:")
    #print(vocab)
    biterms = vec_to_biterms(X)
    #print("Biterms look like:")
    #print(biterms)
    #print("The non-zero parameter we're passing looks like:")
    #print(np.count_nonzero(X, axis=1))
    #print("The sum parameter we're passing in looks like:")
    #print(np.sum(X, axis=0))
    #print("Breakpoint")

    #Create a BTM and pass the biterms to train it:
    #from biterm.btm import oBTM
    #import time
    start_time = time.time()

    #random.seed(1)
    btm = oBTM(num_topics=num_top, V=vocab)
    topics = btm.fit_transform(biterms, iterations=100)
    end_time = time.time()
    run_time = end_time - start_time
    print("For %s..." % company_name)
    print("BTM took %s seconds to train" % run_time)

    #print("First parameter:")
    #print(btm.phi_wz.T)
    #print("Topics:")
    #print(topics)

    ##See if formatting data in the following manner allows pyLDAvis.prepare to work:

    #Visualize the topics:
    #If HTML(vis) doesn't work, look at following link
    #Link: https://jeriwieringa.com/2018/07/17/pyLDAviz-and-Mallet/
    #import pyLDAvis
    ##!This isn't working for some reason
    #vis = pyLDAvis.prepare(btm.phi_wz.T, topics, np.count_nonzero(X, axis=1), vocab, np.sum(X, axis=0))
    #pyLDAvis.display(vis)
    #pyLDAvis.show(vis)
    #from IPython.core.display import HTML
    #HTML(vis)
    #cleanGlish_tweets2["topic"] = topics.argmax()
    cleanGlish_tweets2["topic"] = [
        str(topics[i].argmax())
        for i in range(len(cleanGlish_tweets2["Content"]))
    ]

    #print("\nTweets and Topics:")
    #for i in range(len(cleanGlish_tweets2["Content"])):
    #print("{} (topic: {})".format(cleanGlish_tweets2.iloc[i, cleanGlish_tweets2.columns.get_loc("Content")], topics[i].argmax()))
    #    cleanGlish_tweets2.iat[i, 22] = topics[i].argmax()

    #Examine topic coherence scores:
    print("\nTopic Coherence:")
    topic_summuary(btm.phi_wz.T, X, vocab, 10)

    #Save the tweet topics:
    respath2 = respath + str(company_name) + resEnding
    cleanGlish_tweets2.to_excel(respath2)

Пример #8

0

Показать файл

Файл: BTM_model_IRT.py Проект: PenTompkins/Tompkins_OUDSA5900

def perform_BTM(fpath, num_top):
    company_data = pd.read_excel(fpath)
    company_name = company_data.iloc[0, company_data.columns.get_loc("Author Name")]
    print("\n\n\n\n\nBeginning BTM modeling for %s" % company_name)
    print("This is using %s topics" % num_top)
    
    #Remove retweets from the company account, as they aren't technically company account tweets
    patternDel = "^RT @"
    filter1 = company_data["Content"].str.contains(patternDel)
    company_tweets2 = company_data[~filter1].copy()
    
    ##Designate tweets as 'OT' or 'IRT' prior to removing more tweets or altering tweet contents
    #Perform initial separation based on "^@" regex:
    initIRT = [bool(re.search("^@", i)) for i in company_tweets2["Content"]]
    initOT = [not elem for elem in initIRT]
    #print(initOT)
    
    #Create IRT and OT variables in the data:
    company_tweets2["IRT"] = initIRT
    company_tweets2["OT"] = initOT
    
    
    #Fill in NAs under the 'In Reply To' field with "OT":
    company_tweets2["In Reply To"] = company_tweets2["In Reply To"].replace(np.nan, "OT", regex=True)
    #print(company_tweets["In Reply To"].head(5))
    
    #Call function to improve on initial OT vs. IRT splits:
    company_tweets3 = cleanSplit(company_tweets2, "Content", "IRT", "In Reply To", "Author", "OT")
    
    #For this version, extract IRT tweets only:
    company_tweets = company_tweets3[company_tweets3["IRT"] == True].copy()
    #print(company_tweets.shape)
    #print("Break")    
    
    #Create column such that original tweet contents aren't totally lost after textual pre-processing
    company_tweets["Content2"] = company_tweets["Content"]
    
    #Remove/replace 'smart' apostrophes and quotation marks with standard keyboard equivalents:
    company_tweets["Content"] = company_tweets["Content"].str.replace(r"’", "'") #replace closing smart apostrophes with regular apostrophe
    company_tweets["Content"] = company_tweets["Content"].str.replace(r"‘", "'") #replace opening smart apostrophes with regular apostrophe
    company_tweets["Content"] = company_tweets["Content"].str.replace(r"“", "\"") #replace opening smart quotes with regular quotes
    company_tweets["Content"] = company_tweets["Content"].str.replace(r"”", "\"") #replace closing smart quotes with regular quotes
    
    #Examine tweets after removing/replacing 'smart' apostrophes and quotes:
    #print(company_tweets["Content"].head(5))
    
    #Remove apostrophes followed by 's' and replace with nothing (Disney's becomes Disney):
    company_tweets["Content"] = company_tweets["Content"].str.replace(r"'s", "")
    
    #Remove remaining apostrophes and replace with nothing (convert I'm to Im and such):
    company_tweets["Content"] = company_tweets["Content"].str.replace(r"'", "")
    
    #Standardize the textual contents of tweets:
    textual_tweets = standardize_text(company_tweets, "Content")
    
    
    #Perform lemmatization on the textual contents of tweets:
    textual_tweets["Content"] = lem_with_postag(textual_tweets, "Content")
    #print(textual_tweets["Content"].head(5))
    
    #Removing tweets that weren't originally in English
    English_tweets = textual_tweets[textual_tweets["Language"] == "en"]
    
    #Removing rows with no text left inside them
    filter1 = English_tweets["Content"] != ""
    cleanGlish_tweets = English_tweets[filter1]
    
    
    #Remove stop words from the data:
    #from nltk.corpus import stopwords
    stop_words = set(stopwords.words("english"))
    
        
    ##Expand on the initial set of stopwords:
    stop_words2 = pd.DataFrame(stop_words)
    stop_words2["Words"] = stop_words
    add_stopwords = stop_words2["Words"].str.replace(r"'", "") #replace apostrophes in initial set of stopwords with nothing
    
    #Add the newly created stopwords to the original set:
    for word in add_stopwords:
        if word not in stop_words:
            stop_words.add(word)
            
    #These words need to be added manually to the set of stopwords:
    stop_words.add("wed")
    stop_words.add("us")
    #Lemmatization, for some reason, converts "us" to "u". Therefore, "u" should be added as a stopword as well (for lemmatized versions)
    stop_words.add("u")
    
    
                
    
    ##Vectorize the cleaned tweets
    #from sklearn.feature_extraction.text import CountVectorizer
    
    #Filter out stopwords here:
    vec = CountVectorizer(stop_words=stop_words)
    
    #Tokenize tweet contents:    
    tokenizer = RegexpTokenizer(r'\w+')
        
    cleanGlish_tweets["tokens"] = cleanGlish_tweets["Content"].apply(tokenizer.tokenize)
    cleanGlish_tweets["clean_tokens"] = clean_tokenize(cleanGlish_tweets, "tokens", stop_words)
    
    #print("Token differences:")
    #print(cleanGlish_tweets["tokens"].head(5))
    #print(cleanGlish_tweets["clean_tokens"].head(5))
    
    print("Before filtering out tweets with 3 words or less, cleanGlish has %s tweets" % len(cleanGlish_tweets["Content"]))
    #Filter out tweets with less than 3 words:
    cleanGlish_tweets["num_words"] = [len(token) for token in cleanGlish_tweets["clean_tokens"]]
    cleanGlish_tweets2 = cleanGlish_tweets[cleanGlish_tweets["num_words"] >= 3].copy()
    print("After filtering, cleanGlish2 has %s tweets" % len(cleanGlish_tweets2["Content"]))
    #Determine if filtering out tweets with less than 3 words after stop word removal makes a difference
    #cleanGlish_tweets["num_words2"] = [len(token) for token in cleanGlish_tweets["tokens"]]
    #cleanGlish_tweets3 = cleanGlish_tweets[cleanGlish_tweets["num_words2"] >=3].copy()
    #print("Originally, cleanGlish2 would have had %s tweets" % len(cleanGlish_tweets3["Content"]))
    
    #print("Breakpoint")
    
    X = vec.fit_transform(cleanGlish_tweets2["Content"]).toarray()
    #print("X looks like:")
    #print(X)
    
    #Get the vocabulary and the biterms from the tweets:
    #from biterm.utility import vec_to_biterms, topic_summuary
    
    vocab = np.array(vec.get_feature_names())
    #print("Vocab is:")
    #print(vocab)
    biterms = vec_to_biterms(X)
    #print("Biterms look like:")
    #print(biterms)
    #print("The non-zero parameter we're passing looks like:")
    #print(np.count_nonzero(X, axis=1))
    #print("The sum parameter we're passing in looks like:")
    #print(np.sum(X, axis=0))
    #print("Breakpoint")
    
    
    #Create a BTM and pass the biterms to train it:
    #from biterm.btm import oBTM
    #import time
    start_time = time.time()
    
    #random.seed(1)
    btm = oBTM(num_topics=num_top, V=vocab)
    topics = btm.fit_transform(biterms, iterations=100)
    end_time = time.time()
    run_time = end_time - start_time
    print("For %s..." % company_name)
    print("BTM took %s seconds to train" % run_time)
    
    #print("First parameter:")
    #print(btm.phi_wz.T)
    #print("Topics:")
    #print(topics)
    
    ##See if formatting data in the following manner allows pyLDAvis.prepare to work:
    
    
    #Visualize the topics:
    #If HTML(vis) doesn't work, look at following link
    #Link: https://jeriwieringa.com/2018/07/17/pyLDAviz-and-Mallet/
    #import pyLDAvis
    ##!This isn't working for some reason
    #vis = pyLDAvis.prepare(btm.phi_wz.T, topics, np.count_nonzero(X, axis=1), vocab, np.sum(X, axis=0))
    #pyLDAvis.display(vis)
    #pyLDAvis.show(vis)
    #from IPython.core.display import HTML
    #HTML(vis)
    #cleanGlish_tweets2["topic"] = topics.argmax()
    cleanGlish_tweets2["topic"] = [topics[i].argmax() for i in range(len(cleanGlish_tweets2["Content"]))]
    
    
    #print("\nTweets and Topics:")
    #for i in range(len(cleanGlish_tweets2["Content"])):
        #print("{} (topic: {})".format(cleanGlish_tweets2.iloc[i, cleanGlish_tweets2.columns.get_loc("Content")], topics[i].argmax()))
    #    cleanGlish_tweets2.iat[i, 22] = topics[i].argmax()
    
    #Examine topic coherence scores:
    print("\nTopic Coherence:")
    topic_summuary(btm.phi_wz.T, X, vocab, 10)
    
    #Save the tweet topics:
    respath2 = respath + str(company_name) + resEnding
    cleanGlish_tweets2.to_excel(respath2)

Python topic_summuary примеры использования