def init(data_src_dir, dataset_src_dir, dst_dir, model_dst_dir, training_plot_dst_dir, test_plot_dst_dir, img_format, input_suffix, output_suffix, gt_suffix, transformation, downsample_rate, samples_per_second, multiplier, load): if not load: shutil.rmtree(dst_dir, ignore_errors=True) # Without the delay sometimes weird stuff happens when deleting/creating the folder time.sleep(1) # Crate necessary folders os.makedirs(model_dst_dir) os.makedirs(training_plot_dst_dir) os.makedirs(test_plot_dst_dir) # Preprocess data if necessary if not os.path.isdir(dataset_src_dir): preprocessor = Preprocessor(data_src_dir, dataset_src_dir, img_format, input_suffix, output_suffix, gt_suffix, downsample_rate, samples_per_second) preprocessor.preprocess(transformation=transformation, duration_multiplier=multiplier) tf.reset_default_graph() return tf.Session()
def main(data_src_path, dataset_dst_path, img_format, input_suffix, output_suffix, gt_suffix, transformation, downsample_rate, samples_per_second, duration_multiplier): preprocessor = Preprocessor(data_src_path, dataset_dst_path, img_format, input_suffix, output_suffix, gt_suffix, downsample_rate, samples_per_second) preprocessor.preprocess( gen_input=True, gen_output=True, transformation=transformation, duration_multiplier=duration_multiplier, )
def main(): csv_handler = CSVHandler(data_dir) preprocessor = Preprocessor() # visualizer = Visualizer() logger = Logger() # print "load train data and test data" try: train = csv_handler.load_csv(train_filename) test = csv_handler.load_csv(test_filename) except Exception as e: logger.show_exception(e) # print "preprocess the both data" t_train = train["SalePrice"].values train, test = preprocessor.preprocess(train, test, except_num=True) # print "extract target column and feature column for both data" x_train = train.values x_test = test.values # print "save test ids" test_ids = test.index # print "design training" tuned_parameters = [{'C': [1000, 10000, 100000], 'epsilon': [1000, 100, 10]}] reg = GridSearchCV( SVR(), tuned_parameters, cv=5 ) # print "train" reg.fit(x_train, t_train) logger.show_training_result(reg) # print "prediction" y_train = reg.predict(x_train).astype(int) y_test = reg.predict(x_test).astype(int) # print "save" output = zip(test_ids, y_test) csv_handler.save_csv(output, 'support_vector_regression') # print "show difference between true distribution and prediction" # visualizer.show_result(t_train, y_train) # print "everything works well" return 0
class ObjectiveClassifier: def __init__(self, model_path, senti_path, stop_words, ngrams_path): self.loader = Loader(classname="weka.core.converters.ArffLoader") self.features_calculator = FeaturesCalculator(ngrams_path) self.classifier = Classifier(jobject=serialization.read(model_path)) self.normalizer = Preprocessor(senti_path) self.stop_words = stop_words def classify_tweet(self, tweet, polarity='"positive"'): tweet_normalized = self.normalizer.preprocess(tweet, self.stop_words, "") self.features_calculator.calculateFeatures( tweet_normalized, "output/tweet_features_objective.arff", polarity) tweet_features = self.loader.load_file( "output/tweet_features_objective.arff") tweet_features.class_is_last() for index, inst in enumerate(tweet_features): pred = self.classifier.classify_instance(inst) dist = self.classifier.distribution_for_instance(inst) print("%d - %s - %s" % (index + 1, inst.class_attribute.value( int(pred)), str(dist.tolist())))