示例#1
0
def graph():
    file = filedialog.askopenfilename(filetypes=(("Text files", "*.txt"),
                                                 ("all files", "*.*")))
    f = open(file)
    raw = f.read()
    sentences = nltk.sent_tokenize(raw)
    command = 'download'
    arguments = ['fasttext-social-network-model']
    if command == 'download':
        downloader = DataDownloader()
        for filename in arguments:
            if filename not in AVAILABLE_FILES:
                raise ValueError(f'Unknown package: {filename}')
            source, destination = AVAILABLE_FILES[filename]
            destination_path: str = os.path.join(DATA_BASE_PATH, destination)
            if os.path.exists(destination_path):
                continue
            downloader.download(source=source, destination=destination)
    else:
        raise ValueError('Unknown command')

    import dostoevsky
    from dostoevsky.tokenization import RegexTokenizer
    from dostoevsky.models import FastTextSocialNetworkModel

    tokenizer = RegexTokenizer()
    tokens = tokenizer.split(
        'всё очень плохо')  # [('всё', None), ('очень', None), ('плохо', None)]

    model = FastTextSocialNetworkModel(tokenizer=tokenizer)

    messages = sentences

    results = model.predict(messages, k=2)

    for message, sentiment in zip(messages, results):
        positive_values_all = [
            sentiment.get('positive')
            for message, sentiment in zip(messages, results)
        ]
        positive_values = [
            0.0 if value == None else value for value in positive_values_all
        ]

        negative_values_all = [
            sentiment.get('negative')
            for message, sentiment in zip(messages, results)
        ]
        negative_values = [
            0.0 if value == None else value for value in negative_values_all
        ]
        summary = (len(negative_values))

    n_value = np.array(negative_values)
    p_value = np.array(positive_values)
    counts_value = np.arange(summary)
    plt.plot(counts_value, p_value, n_value)
    plt.show()
示例#2
0
def init_dostoevsky():
    global MODEL

    downloader = DataDownloader()
    for filename in ['vk-embeddings', 'cnn-social-network-model']:
        source, destination = AVAILABLE_FILES[filename]
        destination_path = os.path.join(DATA_BASE_PATH, destination)
        if os.path.exists(destination_path):
            continue
        downloader.download(source=source, destination=destination)

    tokenizer = UDBaselineTokenizer()
    word_vectors_container = SocialNetworkWordVectores()

    MODEL = SocialNetworkModel(
        tokenizer=tokenizer,
        word_vectors_container=word_vectors_container,
        lemmatize=False,
    )
示例#3
0
def begin():
    file = filedialog.askopenfilename(filetypes=(("Text files", "*.txt"),
                                                 ("all files", "*.*")))
    f = open(file)
    raw = f.read()
    sentences = nltk.sent_tokenize(raw)
    command = 'download'
    arguments = ['fasttext-social-network-model']
    if command == 'download':
        downloader = DataDownloader()
        for filename in arguments:
            if filename not in AVAILABLE_FILES:
                raise ValueError(f'Unknown package: {filename}')
            source, destination = AVAILABLE_FILES[filename]
            destination_path: str = os.path.join(DATA_BASE_PATH, destination)
            if os.path.exists(destination_path):
                continue
            downloader.download(source=source, destination=destination)
    else:
        raise ValueError('Unknown command')

    tokenizer = RegexTokenizer()
    tokens = tokenizer.split(
        'всё очень плохо')  # [('всё', None), ('очень', None), ('плохо', None)]

    model = FastTextSocialNetworkModel(tokenizer=tokenizer)

    messages = sentences

    results = model.predict(messages, k=2)

    for message, sentiment in zip(messages, results):

        analysis_line = '\n', message, '\n', '->', '\n', sentiment, '\n'

        text.insert(END, analysis_line)
示例#4
0
def data_downloader():
    return DataDownloader()
import os
import sys
import typing

from dostoevsky.data import DataDownloader, DATA_BASE_PATH, AVAILABLE_FILES

if __name__ == '__main__':
    if '--dry-run' in sys.argv:
        sys.exit(0)
    command: str = sys.argv[1]
    arguments: typing.List[str] = sys.argv[2:]
    if command == 'download':
        downloader = DataDownloader()
        for filename in arguments:
            if filename not in AVAILABLE_FILES:
                raise ValueError(f'Unknown package: {filename}')
            source, destination = AVAILABLE_FILES[filename]
            destination_path: str = os.path.join(DATA_BASE_PATH, destination)
            if os.path.exists(destination_path):
                continue
            downloader.download(source=source, destination=destination)
    else:
        raise ValueError('Unknown command')
def download_dostoevsky_data():
    downloader = DataDownloader()
    filename = 'fasttext-social-network-model'
    source, destination = AVAILABLE_FILES[filename]
    downloader.download(source=source, destination=destination)