Пример #1
0
 def __init__(self):
     self.user_mng = UsersManager()
     self.products_mng = ProductsManager()
     self.pm_mng = PrivateMessageManager()
     products = utilities.load_json(products_path)
     users = utilities.load_json(users_path)
     credit_cards = utilities.load_json(credit_cards_path)
     private_messages = utilities.load_json(pm_path)
     categories = utilities.load_json(categories_path)
     if categories != 0:
         for category in categories:
             self.products_mng.categories_mng.import_category(category)
     if products != 0:
         for (product) in products:
             self.products_mng.import_product(product)
     if users != 0:
         for (user) in users:
             self.user_mng.import_user(user)
     if credit_cards != 0:
         for (credit_card) in credit_cards:
             self.user_mng.import_credit_card(credit_card)
     if private_messages != 0:
         for private_message in private_messages:
             self.pm_mng.import_private_message(private_message)
     UbidManager.total += 1
Пример #2
0
def main():
    sys.stdout.write('Loading countries.json... ')
    countries = load_json(json_file)
    sys.stdout.write(Fore.GREEN + 'DONE\n')

    sys.stdout.write('Creating TimeZone, Language, and Currency objects... ')

    time_zones = extract_unique_time_zones(countries)
    create_TimeZone_objects(time_zones)

    languages = load_json('./json/languages.json')
    create_Language_objects(languages)

    currencies = extract_unique_currencies(countries)
    currency_name = load_json('./json/currencies.json')
    currency_name['BOV'] = 'Bolivian Mvdol'
    currency_name['SSP'] = 'South Sudanese Pound'
    currency_name['CHE'] = 'WIR Euro'
    currency_name['CHW'] = 'WIR Franc'
    currency_name['USN'] = 'United States dollar (next day)'
    currency_name['USS'] = 'United States dollar (same day)'
    currency_name['UYI'] = 'Uruguay Peso en Unidades Indexadas'

    create_Currency_objects(currencies, currency_name)

    sys.stdout.write(Fore.GREEN + 'OK\n')

    # the Language, Currency and TimeZone objects must be present
    # in the database before we create the Country objects!
    sys.stdout.write('Creating Country objects... ')
    create_Country_objects(countries)
    sys.stdout.write(Fore.GREEN + 'OK\n')
Пример #3
0
def process():
    data = load_json(PATH_STEP2_CLEAN)
    kFold = MyKFold(10, shuffle=True)
    print("MLE generating sentences using Language model.")
    for train_tweets, _ in kFold(data):
        models = init_models(3, MLE)
        train_sents = compress(train_tweets)

        # padded multiple models, returning a list of (ngram, vocab)
        ngrams = padded_multiple_models(3, train_sents)

        # train models using ngrams
        fit_multiple_models(models, ngrams)

        for i in range(3):
            print(f"\nGenerating {Ngrams(i)} sentences:")
            for j in range(10):  # generate 10 sentences for each ngram.
                new_sentence = []
                word = models[i].generate(text_seed=['<s>'])
                new_sentence.append(word)
                while word != '</s>':
                    word = models[i].generate(text_seed=[word])
                    new_sentence.append(word)
                print(f"#{j}: [{' '.join(new_sentence)}]")
        break
Пример #4
0
    def __init__(self, hawking, bot, *args, **kwargs):
        self.hawking = hawking
        self.bot = bot 

        self.questions = []
        self.is_mid_question_refresh = False
        self.last_question_refresh_time = time.time()

        ## Load config data
        self.submission_top_time = CONFIG_OPTIONS.get("stupid_question_top_time", "month")
        self.submission_count = CONFIG_OPTIONS.get("stupid_question_submission_count", 500)
        self.refresh_time_seconds = CONFIG_OPTIONS.get("stupid_question_refresh_time_seconds", 21600)
        ## Load module specific configs from 'stupid_questions.json' located in modules folder
        modules_folder_name = CONFIG_OPTIONS.get("modules_folder", "modules")
        config = utilities.load_json(os.path.sep.join([utilities.get_root_path(), modules_folder_name, "stupid_questions.json"]))
        reddit_client_id = config.get("reddit_client_id")
        reddit_secret = config.get("reddit_secret")

        subreddits = CONFIG_OPTIONS.get("stupid_question_subreddits", ["NoStupidQuestions"])
        try:
            self.reddit = Reddit(client_id=reddit_client_id, client_secret=reddit_secret, user_agent=self.REDDIT_USER_AGENT)
            ## Use a multireddit to pull random post from any of the chosen subreddits
            self.subreddit = self.reddit.subreddit("+".join(subreddits))
        except Exception:
            logger.exception("Unable to create reddit/subreddit instance")

        self.bot.loop.create_task(self.load_questions())
Пример #5
0
    def __init__(self, hawking, bot, *args, **kwargs):
        self.hawking = hawking
        self.bot = bot

        ## Load module specific configs from 'stupid_questions.json' located in modules folder
        modules_folder_name = CONFIG_OPTIONS.get("modules_folder", "modules")
        config = utilities.load_json(
            os.path.sep.join([
                utilities.get_root_path(), modules_folder_name,
                "stupid_questions.json"
            ]))
        reddit_client_id = config.get("reddit_client_id")
        reddit_secret = config.get("reddit_secret")

        subreddits = CONFIG_OPTIONS.get("stupid_question_subreddits",
                                        ["NoStupidQuestions"])
        try:
            self.reddit = Reddit(client_id=reddit_client_id,
                                 client_secret=reddit_secret,
                                 user_agent=self.REDDIT_USER_AGENT)
            ## Use a multireddit to pull random post from any of the chosen subreddits
            self.subreddit = self.reddit.subreddit("+".join(subreddits))
        except Exception as e:
            utilities.debug_log("Unable to create reddit/subreddit instance,",
                                e,
                                debug_level=1)
Пример #6
0
 def run(self):
     ## Keep bot going despite any misc service errors
     try:
         self.bot.run(utilities.load_json(self.token_file_path)[self.TOKEN_KEY])
     except Exception as e:
         utilities.debug_print("Critical exception when running bot", e, debug_level=0)
         time.sleep(1)
         self.run()
Пример #7
0
def main():
    sys.stdout.write('Loading albums.json... ')
    albums = load_json(json_file)
    sys.stdout.write(Fore.GREEN + 'DONE\n')

    sys.stdout.write('Creating Album objects... ')
    create_Album_objects(albums)
    sys.stdout.write(Fore.GREEN + 'OK\n')
Пример #8
0
def get_response():
    global response

    if response == None:
        response = utilities.load_json(REQUEST_URL)
    if response["statusCode"] != "OK":
        raise Exception("Code: " + response["statusCode"] +
                        "Message: " + response["statusMessage"])
    return response
Пример #9
0
def main():
    sys.stdout.write('Loading recordings.json... ')
    recordings = load_json('./json/recordings.json')
    sys.stdout.write(Fore.GREEN + 'DONE\n')

    sys.stdout.write('Creating Recording objects... ')
    for recording in recordings:
        create_Recording_object(recording)
    sys.stdout.write(Fore.GREEN + 'OK\n')
Пример #10
0
def main():
    rates = load_json('./json/rates.json')

    for currency_rate in rates.items():
        currency = currency_rate[0][3:]
        try:
            Currency_object = models.Currency.objects.get(iso_code=currency)
        except:
            continue
        Currency_object.usd_rate = currency_rate[1]
        Currency_object.save()
Пример #11
0
async def _experiment_details(experiment_id: str = Path(
    default='latest', title="ID of experiment")):
    if experiment_id == 'latest':
        experiment_id = max(os.listdir(config.EXPERIMENTS_DIR))
    experiment_dir = os.path.join(config.EXPERIMENTS_DIR, experiment_id)
    args = utils.load_json(
        filepath=os.path.join(experiment_dir, 'config.json'))
    classes = data.LabelEncoder.load(
        fp=os.path.join(experiment_dir, 'y_tokenizer.json')).classes
    performance = utils.load_json(
        filepath=os.path.join(experiment_dir, 'performance.json'))
    response = {
        'message': HTTPStatus.OK.phrase,
        'status-code': HTTPStatus.OK,
        'data': {
            "classes": classes,
            "args": args,
            "performance": performance
        }
    }
    config.logger.info(json.dumps(response, indent=2))
    return response
Пример #12
0
    def run(self):
        '''Starts the bot up'''

        ## So ideally there would be some flavor of atexit.register or signal.signal command to gracefully shut the bot
        ## down upon SIGTERM or SIGINT. However that doesn't seem to be possible at the moment. Discord.py's got most of
        ## the functionality built into the base close() method that fires on SIGINT and SIGTERM, but the bot never ends
        ## up getting properly disconnected from the voice channels that it's connected to. I end up having to wait for
        ## a time out. Otherwise the bot will be in a weird state upon starting back up, and attempting to speak in one
        ## of the channels that it was previously in. Fortunately this bad state will self-recover in a minute or so,
        ## but it's still unpleasant. A temporary fix is to bump up the RestartSec= property in the service config to be
        ## long enough to allow for the bot to be forcefully disconnected

        logger.info('Starting up the bot.')
        self.bot.run(utilities.load_json(self.token_file_path)["token"])
Пример #13
0
def predict(experiment_id, text):
    """Predict the class for a text using
    a trained model from an experiment."""
    # Get experiment config
    experiment_dir = os.path.join(config.EXPERIMENTS_DIR, experiment_id)
    experiment_config = utilities.load_json(
        os.path.join(experiment_dir, 'config.json'))
    args = Namespace(**experiment_config)

    # Preprocess
    texts = [text]
    X_tokenizer = data.Tokenizer.load(
        fp=os.path.join(experiment_dir, 'X_tokenizer.json'))
    y_tokenizer = data.LabelEncoder.load(
        fp=os.path.join(experiment_dir, 'y_tokenizer.json'))
    preprocessed_texts = data.preprocess_texts(
        texts, lower=args.lower, filters=args.filters)

    # Create dataset
    X_infer = np.array(X_tokenizer.texts_to_sequences(preprocessed_texts))
    y_filler = np.array([0]*len(X_infer))
    infer_set = data.TextDataset(
        X=X_infer, y=y_filler, batch_size=args.batch_size,
        max_filter_size=max(args.filter_sizes))

    # Load model
    model = models.TextCNN(
        embedding_dim=args.embedding_dim, vocab_size=len(X_tokenizer),
        num_filters=args.num_filters, filter_sizes=args.filter_sizes,
        hidden_dim=args.hidden_dim, dropout_p=args.dropout_p,
        num_classes=len(y_tokenizer.classes))
    model.load_state_dict(torch.load(os.path.join(experiment_dir, 'model.h5')))
    device = torch.device('cuda' if (
        torch.cuda.is_available() and args.cuda) else 'cpu')
    model = model.to(device)

    # Predict
    results = []
    y_prob, conv_outputs = predict_step(
        model=model, dataset=infer_set, filter_sizes=args.filter_sizes, device=device)
    for index in range(len(X_infer)):
        results.append({
            'raw_input': texts[index],
            'preprocessed_input': X_tokenizer.sequences_to_texts([X_infer[index]])[0],
            'probabilities': get_probability_distribution(y_prob[index], y_tokenizer.classes),
            'top_n_grams': get_top_n_grams(tokens=preprocessed_texts[index].split(' '),
                                           conv_outputs={
                                               k: v[index] for k, v in conv_outputs.items()},
                                           filter_sizes=args.filter_sizes)})
    return results
def continuous_save_twitter():
    raw_json_file = PATH_STEP1_RAW
    raw_json_file = os.path.abspath(os.path.join('.', raw_json_file))

    raw_texts = []
    if os.path.exists(raw_json_file) and os.path.isfile(raw_json_file):
        raw_texts = load_json(raw_json_file)
    count = len(raw_texts)
    if count > 0:
        print(f"Preloaded {count} texts.")

    while count < 10000:
        new_texts = fetch_twitter(100)
        raw_texts.extend(new_texts)
        count = len(raw_texts)
        print(f"Outputting {count} texts...")
        save_json(raw_texts, raw_json_file, ensure_ascii=False)
        time.sleep(2)
def process():
    # 1. First load the data into memory
    data = load_json(PATH_STEP2_CLEAN)

    # 2. Making 10-Fold Cross Validation
    kFold = MyKFold(10, False)
    print("Starting 10-Fold CV training/test")
    means = np.zeros((10, 3))
    for idx, (train_tweets, test_tweets) in enumerate(kFold(data)):
        print(f"Fold {idx}:")
        models = init_models(3, KneserNeyInterpolated)
        train_sents = compress(train_tweets)
        test_sents = compress(test_tweets)

        # padded multiple models, returning a list of (ngram, vocab)
        ngrams = padded_multiple_models(3, train_sents)

        # train models using ngrams
        fit_multiple_models(models, ngrams)

        test_ngrams = padded_multiple_models(3, test_sents)
        for n in range(0, 3):
            temp = [(models[n]).perplexity(i)
                    for i in tqdm((test_ngrams[n])[0],
                                  desc=f"perplexity of {Ngrams(n)}",
                                  total=len(test_sents))]
            means[idx, n] = np.mean(temp)
        print(
            f"run {idx}, unigram: {means[idx, 0]}, bigram: {means[idx, 1]}, trigram: {means[idx, 2]}"
        )

    final_means = np.mean(means, axis=0)
    print("Final mean of 10-Fold CV:")
    print(
        f"unigram: {final_means[0]}, bigram: {final_means[1]}, trigram: {final_means[2]}"
    )
Пример #16
0

if __name__ == '__main__':
    # Arguments
    parser = ArgumentParser()
    parser.add_argument('--experiment-id', type=str,
                        default="latest", help="name of the model to load")
    parser.add_argument('--text', type=str,
                        required=True, help="text to predict")
    args = parser.parse_args()

    # Load model config
    if args.experiment_id == 'latest':
        args.experiment_id = max(os.listdir(config.EXPERIMENTS_DIR))
    experiment_dir = os.path.join(config.EXPERIMENTS_DIR, args.experiment_id)
    experiment_config = utilities.load_json(
        os.path.join(experiment_dir, 'config.json'))
    args = Namespace(**{**args.__dict__, **Namespace(**experiment_config).__dict__})
    config.logger.info(f"→ Using {args.experiment_id}")

    # Preprocess
    texts = [args.text]
    X_tokenizer = data.Tokenizer.load(
        fp=os.path.join(experiment_dir, 'X_tokenizer.json'))
    y_tokenizer = data.LabelEncoder.load(
        fp=os.path.join(experiment_dir, 'y_tokenizer.json'))
    preprocessed_texts = data.preprocess_texts(
        texts, lower=args.lower, filters=args.filters)

    # Create dataset
    X_infer = np.array(X_tokenizer.texts_to_sequences(preprocessed_texts))
    y_filler = np.array([0]*len(X_infer))
Пример #17
0
def predict(experiment_id, text):
    """Predict the class for a text using
    a trained model from an experiment."""
    # Get experiment config
    experiment_dir = os.path.join(config.EXPERIMENTS_DIR, experiment_id)
    experiment_config = utilities.load_json(
        os.path.join(experiment_dir, 'config.json'))
    args = Namespace(**experiment_config)

    # Tokenizers
    texts = [text]
    with open(os.path.join(experiment_dir, 'X_tokenizer.json'), 'r') as fp:
        X_tokenizer = tokenizer_from_json(json.load(fp))
    y_tokenizer = LabelEncoder()
    y_tokenizer.classes_ = np.load(os.path.join(experiment_dir,
                                                'y_tokenizer.npy'),
                                   allow_pickle=True)

    # Create dataset generator
    X_infer = np.array(X_tokenizer.texts_to_sequences(texts))
    preprocessed_texts = X_tokenizer.sequences_to_texts(X_infer),
    y_filler = np.array([0] * len(X_infer))
    inference_generator = data.DataGenerator(X=X_infer,
                                             y=y_filler,
                                             batch_size=args.batch_size,
                                             max_filter_size=max(
                                                 args.filter_sizes))

    # Load model
    model = models.TextCNN(embedding_dim=args.embedding_dim,
                           vocab_size=len(X_tokenizer.word_index) + 1,
                           num_filters=args.num_filters,
                           filter_sizes=args.filter_sizes,
                           hidden_dim=args.hidden_dim,
                           dropout_p=args.dropout_p,
                           num_classes=len(y_tokenizer.classes_))
    model.summary(input_shape=(10, ))  # build it
    model_path = os.path.join(experiment_dir, 'model/cp.ckpt')
    model.load_weights(model_path)

    # Conv output model
    conv_outputs_model = models.ConvOutputsModel(
        vocab_size=len(X_tokenizer.word_index) + 1,
        embedding_dim=args.embedding_dim,
        filter_sizes=args.filter_sizes,
        num_filters=args.num_filters)
    conv_outputs_model.summary(input_shape=(10, ))  # build it

    # Set weights
    conv_outputs_model.layers[0].set_weights(model.layers[0].get_weights())
    conv_layer_start_num = 1
    for layer_num in range(conv_layer_start_num,
                           conv_layer_start_num + len(args.filter_sizes)):
        conv_outputs_model.layers[layer_num].set_weights(
            model.layers[layer_num].get_weights())

    # Predict
    results = []
    y_prob = model.predict(x=inference_generator, verbose=1)
    conv_outputs = conv_outputs_model.predict(x=inference_generator, verbose=1)
    for index in range(len(X_infer)):
        results.append({
            'raw_input':
            texts[index],
            'preprocessed_input':
            preprocessed_texts[index][0],
            'probabilities':
            get_probability_distribution(y_prob[index], y_tokenizer.classes_),
            'top_n_grams':
            get_top_n_grams(tokens=preprocessed_texts[index][0].split(' '),
                            conv_outputs=conv_outputs,
                            filter_sizes=args.filter_sizes)
        })

    return results
Пример #18
0
def process():
    data = load_json(PATH_STEP1_RAW)
    data = list(map(extract_text, data))
    data = list(map(sentence_segment, data))
    data = list(map(word_tokenize_sentpad, data))
    save_json(data, PATH_STEP2_CLEAN, ensure_ascii=False)
Пример #19
0
 def get_json(self, url):
     return load_json(url)
Пример #20
0
import os
import logging
import logging.config

import utilities as utils

# Directories
BASE_DIR = os.getcwd()  # project root
LOGS_DIR = os.path.join(BASE_DIR, 'logs')
EXPERIMENTS_DIR = os.path.join(BASE_DIR, 'experiments')
TENSORBOARD_DIR = os.path.join(BASE_DIR, 'tensorboard')

# Create dirs
utils.create_dirs(LOGS_DIR)
utils.create_dirs(EXPERIMENTS_DIR)
utils.create_dirs(TENSORBOARD_DIR)

# Loggers
log_config = utils.load_json(filepath=os.path.join(BASE_DIR, 'logging.json'))
logging.config.dictConfig(log_config)
logger = logging.getLogger('logger')
Пример #21
0
 def get_json(self, url):
     return load_json(url, {"X-Mashape-Authorization": AUTH_KEY})