def init(data_src_dir, dataset_src_dir, dst_dir, model_dst_dir,
         training_plot_dst_dir, test_plot_dst_dir, img_format, input_suffix,
         output_suffix, gt_suffix, transformation, downsample_rate,
         samples_per_second, multiplier, load):
    if not load:
        shutil.rmtree(dst_dir, ignore_errors=True)
        # Without the delay sometimes weird stuff happens when deleting/creating the folder
        time.sleep(1)

        # Crate necessary folders
        os.makedirs(model_dst_dir)
        os.makedirs(training_plot_dst_dir)
        os.makedirs(test_plot_dst_dir)

        # Preprocess data if necessary
        if not os.path.isdir(dataset_src_dir):
            preprocessor = Preprocessor(data_src_dir, dataset_src_dir,
                                        img_format, input_suffix,
                                        output_suffix, gt_suffix,
                                        downsample_rate, samples_per_second)
            preprocessor.preprocess(transformation=transformation,
                                    duration_multiplier=multiplier)

    tf.reset_default_graph()
    return tf.Session()
예제 #2
0
def main(data_src_path, dataset_dst_path, img_format, input_suffix,
         output_suffix, gt_suffix, transformation, downsample_rate,
         samples_per_second, duration_multiplier):
    preprocessor = Preprocessor(data_src_path, dataset_dst_path, img_format,
                                input_suffix, output_suffix, gt_suffix,
                                downsample_rate, samples_per_second)
    preprocessor.preprocess(
        gen_input=True,
        gen_output=True,
        transformation=transformation,
        duration_multiplier=duration_multiplier,
    )
예제 #3
0
def main():
    csv_handler = CSVHandler(data_dir)
    preprocessor = Preprocessor()
    # visualizer = Visualizer()
    logger = Logger()

    # print "load train data and test data"
    try:
        train = csv_handler.load_csv(train_filename)
        test = csv_handler.load_csv(test_filename)
    except Exception as e:
        logger.show_exception(e)

    # print "preprocess the both data"
    t_train = train["SalePrice"].values
    train, test = preprocessor.preprocess(train, test, except_num=True)

    # print "extract target column and feature column for both data"
    x_train = train.values
    x_test = test.values

    # print "save test ids"
    test_ids = test.index

    # print "design training"
    tuned_parameters = [{'C': [1000, 10000, 100000], 'epsilon': [1000, 100, 10]}]
    reg = GridSearchCV(
        SVR(),
        tuned_parameters,
        cv=5
    )

    # print "train"
    reg.fit(x_train, t_train)
    logger.show_training_result(reg)

    # print "prediction"
    y_train = reg.predict(x_train).astype(int)
    y_test = reg.predict(x_test).astype(int)

    # print "save"
    output = zip(test_ids, y_test)
    csv_handler.save_csv(output, 'support_vector_regression')

    # print "show difference between true distribution and prediction"
    # visualizer.show_result(t_train, y_train)

    # print "everything works well"
    return 0
예제 #4
0
class ObjectiveClassifier:
    def __init__(self, model_path, senti_path, stop_words, ngrams_path):
        self.loader = Loader(classname="weka.core.converters.ArffLoader")
        self.features_calculator = FeaturesCalculator(ngrams_path)
        self.classifier = Classifier(jobject=serialization.read(model_path))
        self.normalizer = Preprocessor(senti_path)
        self.stop_words = stop_words

    def classify_tweet(self, tweet, polarity='"positive"'):
        tweet_normalized = self.normalizer.preprocess(tweet, self.stop_words,
                                                      "")
        self.features_calculator.calculateFeatures(
            tweet_normalized, "output/tweet_features_objective.arff", polarity)
        tweet_features = self.loader.load_file(
            "output/tweet_features_objective.arff")
        tweet_features.class_is_last()
        for index, inst in enumerate(tweet_features):
            pred = self.classifier.classify_instance(inst)
            dist = self.classifier.distribution_for_instance(inst)
            print("%d - %s - %s" %
                  (index + 1, inst.class_attribute.value(
                      int(pred)), str(dist.tolist())))