def text_regression(self, output_dim: int = None, **kwargs) -> ak.TextRegressor: """Text Regression. Args: output_dim (int, optional): Number of output dimensions. Defaults to None. Returns: ak.TextRegressor: AutoKERAS text regression class. """ return ak.TextRegressor( output_dim=output_dim, loss=self.loss, metrics=self.metrics, project_name=self.project_name, max_trials=self.max_trials, directory=self.directory, objective=self.objective, tuner=self.tuner, overwrite=self.overwrite, seed=self.seed, max_model_size=self.max_model_size, **kwargs, )
def test_text_regressor(tmp_dir): (train_x, train_y), (test_x, test_y) = common.imdb_raw() train_y = common.generate_data(num_instances=train_y.shape[0], shape=(1, )) test_y = common.generate_data(num_instances=test_y.shape[0], shape=(1, )) clf = ak.TextRegressor(directory=tmp_dir, max_trials=2, seed=common.SEED) clf.fit(train_x, train_y, epochs=1, validation_data=(test_x, test_y)) assert clf.predict(test_x).shape == (len(test_x), 1)
def test_text_regressor(tmp_path): train_x = utils.generate_text_data(num_instances=300) test_x = train_x train_y = utils.generate_data(num_instances=300, shape=(1,)) test_y = train_y clf = ak.TextRegressor(directory=tmp_path, max_trials=2, seed=utils.SEED) clf.fit(train_x, train_y, epochs=1, validation_data=(test_x, test_y)) clf.export_model() assert clf.predict(test_x).shape == (len(test_x), 1)
def build_model(self) -> ak.AutoModel: model = None if self.data_type == 'image': if self.task_type == 'regression': model = ak.ImageRegressor() elif self.task_type == 'classification': model = ak.ImageClassifier() elif self.data_type == 'text': if self.task_type == 'regression': model = ak.TextRegressor() elif self.task_type == 'classification': model = ak.TextRegressor() elif self.data_type == 'csv': if self.task_type == 'regression': model = ak.StructuredDataRegressor() elif self.task_type == 'classification': model = ak.StructuredDataClassifier() return model
def test_text_regressor(tmp_dir): (train_x, train_y), (test_x, test_y) = imdb_raw() train_y = np.random.rand(100, 1) clf = ak.TextRegressor(directory=tmp_dir, max_trials=2) clf.fit(train_x, train_y, epochs=2, validation_split=0.2) assert clf.predict(test_x).shape == (len(test_x), 1)
x_train = np.array(train_data.data) y_train = np.array(train_data.target) x_test = np.array(test_data.data) y_test = np.array(test_data.target) print(x_train.shape) # (25000,) print(y_train.shape) # (25000, 1) print(x_train[0][:50]) # <START> this film was just brilliant casting <UNK> """ The second step is to run the [TextRegressor](/text_regressor). As a quick demo, we set epochs to 2. You can also leave the epochs unspecified for an adaptive number of epochs. """ # Initialize the text regressor. reg = ak.TextRegressor(overwrite=True, max_trials=1) # It tries 10 different models. # Feed the text regressor with training data. reg.fit(x_train, y_train, epochs=2) # Predict with the best model. predicted_y = reg.predict(x_test) # Evaluate the best model with testing data. print(reg.evaluate(x_test, y_test)) """ ## Validation Data By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use `validation_split` to specify the percentage. """ reg.fit( x_train,
# it does not matter. # this thing is too damn slow. # keeping using the same model, and we will see the outcome. # this is f****d. # when using autokeras, do not put it onto that repo. # asm is only for non-windows platform. # so we should use macos or linux? # replace it with ubuntu? # f**k. soon we will be banned to commit anything. clt = list(map(lambda x:"mkdri\\"+x,["collective.csv", "final.csv"])) x_train, y_train = shitshow(clt[0]) x_test, y_test = shitshow(clt[1]) x_train = np.array(x_train, dtype=np.str) x_test = np.array(x_test, dtype=np.str) # the deep hidden meaning. tr = ak.TextRegressor(name="randall",seed=int(time.time()), max_trials=10) # what the f**k? # do i need to try multiple models? tr.fit(x_train, y_train, epochs=10) # shit this is f****d. print('Accuracy: {accuracy}'.format(accuracy=tr.evaluate(x_test, y_test))) # there is no cuda support! # infer from the data. # the translator. # the function. # i know what you are doing. # never mind. # name of the automodel. # f**k me. # how to create a training set? # what is the result?
def test_txt_reg_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.TextRegressor(directory=tmp_path, seed=utils.SEED) auto_model.fit(x=np.array(["a b c", "b b c"]), y=np.array([1.0, 2.0])) assert fit.is_called
x_test = list(map(lambda sentence: ' '.join( id_to_word[i] for i in sentence), x_test)) x_train = np.array(x_train, dtype=np.str) x_test = np.array(x_test, dtype=np.str) print(x_train.shape) # (25000,) print(y_train.shape) # (25000, 1) print(x_train[0][:50]) # <START> this film was just brilliant casting <UNK> """ The second step is to run the [TextRegressor](/text_regressor). """ import autokeras as ak # Initialize the text regressor. reg = ak.TextRegressor(max_trials=1) # It tries 10 different models. # Feed the text regressor with training data. reg.fit(x_train, y_train, epochs=2) # Predict with the best model. predicted_y = reg.predict(x_test) # Evaluate the best model with testing data. print(reg.evaluate(x_test, y_test)) """ ## Validation Data By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use `validation_split` to specify the percentage. """ reg.fit(x_train,
""" # converting from other formats (such as pandas) to numpy df = pd.read_csv("./News_Final.csv") text_inputs = df.Title.to_numpy(dtype="str") media_success_outputs = df.Facebook.to_numpy(dtype="int") """ Next, initialize and train the [TextRegressor](/text_regressor). """ # Initialize the text regressor reg = ak.TextRegressor(max_trials=15) # AutoKeras tries 15 different models. # Find the best model for the given training data reg.fit(text_inputs, media_success_outputs) # Predict with the chosen model: predict_y = reg.predict(text_inputs) """ If your text source has a larger vocabulary (number of distinct words), you may need to create a custom pipeline in AutoKeras to increase the `max_tokens` parameter. """ text_input = (df.Title + " " + df.Headline).to_numpy(dtype="str")
] # Add metric variables to GLOBAL_VARIABLES collection. # They will be initialized for new session. for v in metric_vars: tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v) # force to update metric values with tf.control_dependencies([update_op]): pearson_r = tf.identity(pearson_r) return pearson_r x_train = np.array(E, dtype=np.str) x_test = np.array(D, dtype=np.str) import autokeras as ak # Initialize the text classifier. clf = ak.TextRegressor( max_trials=2, objective="val_mean_squared_error", overwrite=False, loss="mean_absolute_error") # It tries 10 different models. # Feed the text classifier with training data. clf.fit(x_train, y_train) # Predict with the best model. predicted_y = clf.predict(x_test) # Evaluate the best model with testing data. print(clf.evaluate(x_test, y_test))