This module uses simple deep learning regression to forecast web traffic. @Author: DuFei @Created Time: 2019/11/02 22:04 """ import numpy as np from sklearn.neural_network import MLPRegressor from data.kaggle_wikipedia_traffic import get_kaggle_sample_data from util.logger_util import get_logger from util.metric_util import smape from util.plot_util import plot_regress_predict from preprocess.features_engineering import log1p_and_normalize_value, recover_log1p_and_normalize_value, \ make_time_features, split_time_series_data logger = get_logger() def evaluate_by_mlp(input_df): raw_values, mean, std = log1p_and_normalize_value(input_df.values) X = np.stack(make_time_features(input_df.index), axis=-1) y = np.expand_dims(raw_values, -1) X_train, X_test, y_train, y_test = split_time_series_data(X, y, test_length=100) logger.info(f'raw X shape:{X.shape} raw y shape:{y.shape}') logger.info(f'X_train shape:{X_train.shape} y_train shape:{y_train.shape} ' f'X_test shape:{X_test.shape} y_test shape:{y_test.shape}') mlp_regression = MLPRegressor(hidden_layer_sizes=(10, 50,), activation='logistic', solver='sgd', early_stopping=True)
import time from scrapy.dupefilters import BaseDupeFilter from scrapy.utils.request import request_fingerprint from util.logger_util import get_logger from .connection import get_redis_from_settings DEFAULT_DUPEFILTER_KEY = "dupefilter:%(timestamp)s" logger = get_logger("my_scrapy_redis_dupefilter.log") # TODO: Rename class to RedisDupeFilter. class RFPDupeFilter(BaseDupeFilter): """Redis-based request duplicates filter. This class can also be used with default Scrapy's scheduler. """ logger = logger def __init__(self, server, key, debug=False): """Initialize the duplicates filter. Parameters ---------- server : redis.StrictRedis The redis server instance. key : str