Пример #1
0
 def test(self):
     processor = Preprocessor(['name:/abc/'])
     self.assertEqual(processor.process(['123', 'abc', '456']),
                      ['123', Variable('name'), '456'])
     processor = Preprocessor(['ip:/\\d\\d\\d\\.\\d\\d\\d\\.\\d\\d\\d/'])
     self.assertEqual(processor.process(['127.123.321.888', 'abc', '456']),
                      [Variable('ip'), 'abc', '456'])
Пример #2
0
    def preprocess(self, custom_file="", custom=False):
        print("Preprocessing")

        if custom:
            pp = Preprocessor(self.subreddit,
                              self.sample_size,
                              self.percentile,
                              custom=True,
                              custom_file=custom_file)
            output, _ = pp.process(custom=True, custom_file=custom_file)
            output = [(str(output[i]), 0) for i in range(len(output))]
            vocab, char2idx, idx2char, text_as_int = vectorize(output)

            return self.setup_vectorized_data(vocab, char2idx, idx2char,
                                              text_as_int)
        else:
            pp = Preprocessor(self.subreddit, self.sample_size,
                              self.percentile)
            comments, num = pp.process()
            good_comments = pp.statistics(comments)

            print(num)
            print(len(comments))
            print(len(good_comments))

            vocab, char2idx, idx2char, text_as_int = vectorize(good_comments)
            return self.setup_vectorized_data(vocab, char2idx, idx2char,
                                              text_as_int)
Пример #3
0
def make_dataset(source_paths, source_labels, source_config, target_paths,
                 target_labels, target_config, batch_size):
    source_preprocessor = Preprocessor(source_config)
    target_preprocessor = Preprocessor(target_config)
    datasets = []
    for paths, labels in zip(source_paths, source_labels):
        datasets.append(
            make_domain_dataset(paths, labels, source_preprocessor,
                                batch_size))
    datasets.append(
        make_domain_dataset(target_paths, target_labels, target_preprocessor,
                            batch_size))
    return tf.data.Dataset.zip(tuple(datasets)).repeat()
Пример #4
0
def testTennisOrIris(trainDataFile, testDataFile, attrDataFile):
    data = Preprocessor(trainDataFile, testDataFile, attrDataFile)
    data.loadData()
    trainData = data.getMatrix(data.getTrainData())
    testData = data.getMatrix(data.getTestData())
 
    numInput = data.getNumInput()
    numOutput = len(data.getClasses())
    numHidden = 3
    seed = 4 
    learningRate = 0.1
    maxEpochs = 5000
    momentum = 0.0

    print("Generating neural network: %d-%d-%d" % (numInput, numHidden,numOutput))
    nn = NeuralNetwork(numInput, numHidden, numOutput, seed)
    nn.train(trainData, maxEpochs, learningRate, momentum)
    print("Training complete")

 #   accTrain = nn.accuracy(trainData)
    accTest = nn.accuracy(testData)

 #   print("\nAccuracy on train data = %0.4f " % accTrain)
   
    print("Accuracy on test data   = %0.4f " % accTest)
Пример #5
0
def main(_):
    pre_processor = Preprocessor()
    pre_processor.set_train_test_data(0.8)

    model = Model('winner_predict_model')
    model.learning_rate = 0.01
    model.sess = tf.Session()

    model.builder(team_input_size=pre_processor.team_input_size,
                  player_input_size=pre_processor.player_input_size,
                  output_size=pre_processor.output_size,
                  model_name='model_builder')
    model.run_train(train_epoch=5000,
                    train_x_home_team=pre_processor.train_x_home_team,
                    train_x_away_team=pre_processor.train_x_away_team,
                    train_x_home_player=pre_processor.train_x_home_player,
                    train_x_away_player=pre_processor.train_x_away_player,
                    train_y=pre_processor.train_y,
                    keep_prob=0.7,
                    print_num=500)
    model.run_test(test_x_home_team=pre_processor.test_x_home_team,
                   test_x_away_team=pre_processor.test_x_away_team,
                   test_x_home_player=pre_processor.test_x_home_player,
                   test_x_away_player=pre_processor.test_x_away_player,
                   test_y=pre_processor.test_y)
    model.closer()
Пример #6
0
def externalVoodoo(input,
                   output,
                   linkTo,
                   pathToRemoveFromIdentifier="",
                   trace=False):
    inputLines = _readLinesOfFile(input)
    perFileSettings = PerFileSettings(inputLines)
    preprocessor = Preprocessor(linkTo, output, inputLines,
                                pathToRemoveFromIdentifier)

    out = preprocessor.externalHeader()
    out += '#include "VoodooConfiguration.h"\n'
    out += '#include <VoodooCommon/Common.h>\n\n'
    out += "namespace External\n{\n\n"
    iterator = VoodooMultiplexerIterator(perFileSettings)
    iterator.process(input)
    out += iterator.iter()
    out += "\n}\n\n"
    out += preprocessor.externalSwitchToExpectation()
    out += '#include "VoodooCommon/All.h"\n\n'
    out += "namespace External\n{\n\n"
    out += iterator.expect()
    out += "\n}\n\n"
    out += preprocessor.externalFooter()
    return out
Пример #7
0
	def setUpClass(self):
		self.DEBUG = False
		self.METRICS = False

		self.data_api_impl = DataApi('../../../data/')
		self.cross_validator_impl = CrossValidator()
		self.preprocessor_impl = Preprocessor()
    def test_parent_class(self):
        configuration = Configuration(CONF_PATH)
        preprocessor = Preprocessor(configuration)
        ros_msg_processor = RosMsgProcessor(configuration)

        class_definition_dict = {}

        kidl_file = "class_with_ros_mdlw_and_parent_class.yaml"

        with open("%s%s" % (INCLUDE_PATH, kidl_file), 'r') as stream:
            try:
                class_definition_data = yaml.load(stream,
                                                  Loader=yaml.FullLoader)
            except yaml.YAMLError as exc:
                print(exc)

        class_definition = preprocessor.process(class_definition_data, False)
        class_definition_dict[class_definition.class_name] = class_definition

        kidl_file = "basic_class_with_ros_mdlw.yaml"

        with open("%s%s" % (INCLUDE_PATH, kidl_file), 'r') as stream:
            try:
                class_definition_data = yaml.load(stream,
                                                  Loader=yaml.FullLoader)
            except yaml.YAMLError as exc:
                print(exc)

        class_definition = preprocessor.process(class_definition_data, False)
        class_definition_dict[class_definition.class_name] = class_definition

        ros_msg_definition = ros_msg_processor.process(
            'kpsr::codegen::ClassWithParentClass', class_definition_dict)

        print(ros_msg_definition)
Пример #9
0
    def processData(self):
        """
        The purpose of this method is to process both train/test raw data
        """
        # Load the preprocessor
        preprocessor = Preprocessor()

        if self.train:
            filename = self.parameters['data-path'] + self.parameters[
                'train-data-filename']
        else:
            filename = self.parameters['data-path'] + self.parameters[
                'test-data-filename']

        # read the required file
        data_df = pd.read_json(path_or_buf=filename, lines=True)

        # concatenate response and last 'n' contexts together
        data_df['CONTEXT'] = data_df['context'].apply(
            lambda x: ' '.join(x[-self.n_last_context:]))
        data_df['text'] = data_df['CONTEXT'] + ' ' + data_df['response']
        data_df['text'] = data_df['text'].apply(
            lambda x: preprocessor.process_text_bert(x))

        # save the processed data
        if self.train:
            filename = self.parameters['processed-data-path'] + self.parameters[
                'processed-train-data-filename']
            data_df[['text', 'label']].to_csv(filename)
        else:
            filename = self.parameters['processed-data-path'] + self.parameters[
                'processed-test-data-filename']
            data_df[['text']].to_csv(filename)
        return
Пример #10
0
    def test_with_builder(self):
        configuration = Configuration(CONF_PATH)
        preprocessor = Preprocessor(configuration)
        poco_processor = PocoProcessor(configuration)

        class_definition_dict = {}

        kidl_file = "basic_class_with_builder.yaml"

        with open("%s%s" % (INCLUDE_PATH, kidl_file), 'r') as stream:
            try:
                class_definition_data = yaml.load(stream,
                                                  Loader=yaml.FullLoader)
            except yaml.YAMLError as exc:
                print(exc)

        class_definition = preprocessor.process(class_definition_data, False)
        class_definition_dict[class_definition.class_name] = class_definition

        poco_definition = poco_processor.process('BasicClassWithBuilder',
                                                 class_definition_dict, '')

        env = Environment(loader=FileSystemLoader(TEMPLATE_PATH))
        template = env.get_template('poco_template.h')

        print(template.render(definition=poco_definition))
def main():
    # create the experiments dirs
    create_dirs(config)

    # create tensorflow session
    sess = tf.Session()

    # build preprocessor
    preprocessor = Preprocessor(config)

    # load data, preprocess and generate data
    data = DataGenerator(preprocessor, config)

    # create an instance of the model you want
    model = TextCNN.TextCNN(preprocessor, config)

    # create tensorboard logger
    logger = Logger(sess, config)

    # create trainer and pass all the previous components to it
    trainer = Trainer(sess, model, data, config, logger)

    # load model if exists
    model.load(sess)

    # here you train your model
    trainer.train()
Пример #12
0
    def __init__(self, policy_cls, env_id, args):
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        self.args = args
        self.env = gym.make(env_id, **args2envkwargs(args))
        self.policy_with_value = policy_cls(self.args)
        self.iteration = 0
        if self.args.mode == 'training':
            self.log_dir = self.args.log_dir + '/evaluator'
        else:
            self.log_dir = self.args.test_log_dir
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)

        self.preprocessor = Preprocessor((self.args.obs_dim, ),
                                         self.args.obs_preprocess_type,
                                         self.args.reward_preprocess_type,
                                         self.args.obs_scale,
                                         self.args.reward_scale,
                                         self.args.reward_shift,
                                         gamma=self.args.gamma)

        self.writer = self.tf.summary.create_file_writer(self.log_dir)
        self.stats = {}
        self.eval_timer = TimerStat()
        self.eval_times = 0
Пример #13
0
def pipe(ops):
    logging.info("creating pipeline")
    s_count_vec = 'count_vec'
    s_hash_vec = 'hash_vec'
    s_tfidf_vec = 'tfidf_vec'
    s_my_preproc = 'my_preproc'
    s_tfidf_trans = 'tfidf_trans'
    s_mnb_cls = 'mnb_cls'

    pipe_dic = {}
    ####  Pré-Processing ##########################
    pipe_dic[s_my_preproc] = Preprocessor() if s_my_preproc in ops else None
    pipe_dic[s_count_vec] = CountVectorizer() if s_count_vec in ops else None
    # n_features=2**20 (default value)
    pipe_dic[s_hash_vec] = HashingVectorizer() if s_hash_vec in ops else None
    pipe_dic[s_tfidf_vec] = TfidfVectorizer() if s_tfidf_vec in ops else None

    # transformer
    pipe_dic[s_tfidf_trans] = TfidfTransformer(
    ) if s_tfidf_trans in ops else None

    ### Classification Algorithms #################
    pipe_dic[s_mnb_cls] = MultinomialNB() if s_mnb_cls in ops else None

    return Pipeline([(key, pipe_dic[key]) for key in ops
                     if pipe_dic[key] != None])
Пример #14
0
def startSingularFileExperiment(imgName, imgType, withSkel=False, scale=3):
    print "************************************************************"
    Preprocessor.Preprocessor(imgName + "." + imgType,
                              imgName + "." + imgType,
                              withSkel,
                              scale=scale)
    print "Done preprocessing with image: " + imgName

    out = pytesseract.image_to_string(
        Image.open("output/preprocessed/" + imgName + "." + imgType))
    out2 = pytesseract.image_to_string(
        Image.open("output/preprocessed/" + imgName + "." + imgType),
        lang="Merchant+Club+Fake+Open+Anonymous+Arial")

    if (withSkel):
        out3 = pytesseract.image_to_string(
            Image.open("output/skeleton/" + imgName + "." + imgType))
        out4 = pytesseract.image_to_string(
            Image.open("output/skeleton/" + imgName + "." + imgType),
            lang="Merchant+Club+Fake+Open+Anonymous+Arial")
        writeToFile(imgName + "_skel.txt", out3)
        writeToFile(imgName + "_skel.txt", out4, False)

    print "Done classifying with image: " + imgName

    writeToFile(imgName + ".txt", out)
    writeToFile(imgName + ".txt", out2, False)
    print "Done writing to file with image: " + imgName
    print "************************************************************"
Пример #15
0
def testIrisNoisy(trainDataFile, testDataFile, attrDataFile):
    data = Preprocessor(trainDataFile, testDataFile, attrDataFile)
    data.loadData()
    testData = data.getMatrix(data.getTestData()) 
    numInput = data.getNumInput() 
    numOutput = len(data.getClasses())
    numHidden = 3
    seed = 4 
    learningRate = 0.1
    maxEpochs = 5000
    momentum = 0.0
 
    for rate in range(0, 21, 2):
        noisyData = addNoise(data.getTrainData(), rate, data.getClasses())
        trainData = data.getMatrix(noisyData) 
        print("\nNoise Rate (%): " + str(rate)) 
        print("Generating neural network: %d-%d-%d" % (numInput, numHidden,numOutput)) 
        nn = NeuralNetwork(numInput, numHidden, numOutput, seed)
        nn.train(trainData, maxEpochs, learningRate, momentum, showEpochs=False, vRatio=0.85)
        print("Training complete")

        accTrain = nn.accuracy(trainData)
        accTest = nn.accuracy(testData)

        accValidTrain = nn.accuracy(trainData, validationOn=True)
        accValidTest = nn.accuracy(testData, validationOn=True)
        print("w/o validation set:")
        print("Accuracy on train data = %0.4f " % accTrain)
        print("Accuracy on test data   = %0.4f " % accTest)
    
        print("w/ validation set:")
        print("Accuracy on train data = %0.4f " % accValidTrain)
        print("Accuracy on test data   = %0.4f " % accValidTest)
Пример #16
0
def validate(model: Model, loader: DataLoaderIAM, line_mode: bool) -> Tuple[float, float]:
    """Validates NN."""
    print('Validate NN')
    loader.validation_set()
    preprocessor = Preprocessor(get_img_size(line_mode), line_mode=line_mode)
    num_char_err = 0
    num_char_total = 0
    num_word_ok = 0
    num_word_total = 0
    while loader.has_next():
        iter_info = loader.get_iterator_info()
        print(f'Batch: {iter_info[0]} / {iter_info[1]}')
        batch = loader.get_next()
        batch = preprocessor.process_batch(batch)
        recognized, _ = model.infer_batch(batch)

        print('Ground truth -> Recognized')
        for i in range(len(recognized)):
            num_word_ok += 1 if batch.gt_texts[i] == recognized[i] else 0
            num_word_total += 1
            dist = editdistance.eval(recognized[i], batch.gt_texts[i])
            num_char_err += dist
            num_char_total += len(batch.gt_texts[i])
            print('[OK]' if dist == 0 else '[ERR:%d]' % dist, '"' + batch.gt_texts[i] + '"', '->',
                  '"' + recognized[i] + '"')

    # print validation result
    char_error_rate = num_char_err / num_char_total
    word_accuracy = num_word_ok / num_word_total
    print(f'Character error rate: {char_error_rate * 100.0}%. Word accuracy: {word_accuracy * 100.0}%.')
    return char_error_rate, word_accuracy
Пример #17
0
def hyper_objective(train_X, train_y, nfolds, space):
    kwargs = {}
    for k, v in space.items():
        if k in [
                'boost_true_positive_feedback', 'number_of_states',
                'number_of_pos_neg_clauses_per_label', 'threshold'
        ]:
            v = int(v)
            pass
        kwargs[k] = v
        pass

    pre = Preprocessor(nbits=3)
    clf = Pipeline(steps=[(
        'preprocessor',
        pre), ('clf', TsetlinMachineClassifier(random_state=1, **kwargs))])

    from sklearn.model_selection import StratifiedKFold
    kf = StratifiedKFold(n_splits=nfolds, random_state=1, shuffle=True)

    from sklearn.model_selection import cross_val_score
    scores = cross_val_score(clf,
                             train_X,
                             train_y,
                             cv=kf,
                             n_jobs=N_JOBS,
                             fit_params={'clf__n_iter': 300})

    score = np.mean(scores)

    print('best score: {:.5f}  best params: {}'.format(score, kwargs))
    return -score
def run_imputation(df):
    """
    Fill in missing numeric values using Kalman Filtering,
    and fill in missing null 
    """

    # Create the dateIndex
    time_cols = ['year', 'month', 'day', 'hour']
    df["timestamp"] = pd.to_datetime(df[time_cols])
    df.set_index("timestamp", inplace=True)
    df.drop(columns=time_cols, inplace=True)
    # Check if there are null values in the dataset and get the columns
    nulls = df.isnull().sum()
    null_cols = nulls[nulls > 0].index.values
    numeric_null_cols = get_numeric_null_cols(df, null_cols)
    obj_null_cols = get_string_null_cols(df, null_cols)

    # Use Kalman Filtering to impute missing numeric
    # values
    for col in numeric_null_cols:
        prep = Preprocessor()
        arr = prep.kalman_impute(df[col])
        df[col] = arr

        # Backfill any missing data at the beginning of the array
        if df[col].isnull().sum():
            df[col].fillna(method="bfill", inplace=True)

    # Random draw based on distribution of
    # unique vals in each column
    for col in obj_null_cols:
        arr = fill_missing_strings(df[col])
        df[col] = arr

    return df
Пример #19
0
    def test(self, x, batch_size=32, metric='mse'):
        # Prepare data loader, placeholders, function feeds & outputs
        loader = x if isinstance(x, Preprocessor) else Preprocessor([],
                                                                    loader=x)
        placeholders = loader.get_placeholders()

        error, baseline, feed = self._untangle_function_rets(
            self._test(*placeholders, metric=metric))
        to_run = [error, baseline] if baseline is not None else error

        # Calculate error for all samples, one batch at a time
        errors, baselines = [], []
        while loader.has_batch():
            batch_data = loader.sample(batch_size, 'test', full_augment=True)
            feed.update({k: v for k, v in zip(placeholders, batch_data)})

            # Evaluate network on batch
            t_rets = self.session.run(to_run, feed_dict=feed)
            t_errors, t_baselines = t_rets if isinstance(
                t_rets, (tuple, list)) else (t_rets, None)

            # Store test results
            if t_errors is not None:
                errors.append(t_errors)
            if t_baselines is not None:
                baselines.append(t_baselines)

        # Concatenate batch results and return
        return np.concatenate(errors, axis=0), np.concatenate(baselines,
                                                              axis=0)
Пример #20
0
 def __init__(self, model_name="test.hdf5"):
     print('Starting test of {}'.format(model_name))
     models_path = path.abspath(path.join(
         __file__, "../../..")) + "/models/" + model_name
     print(models_path)
     self.model = load_model(models_path)
     self.preprocessor = Preprocessor()
Пример #21
0
    def __init__(self, left_filename, right_filename, directory, config):
        super(TestPredictionCallback, self).__init__()

        self.directory = directory

        # Crop and expand dims to batch = 1
        crop_start_row = config['crop_start_row']
        crop_start_col = config['crop_start_col']
        crop_stop_row = crop_start_row + config['crop_height']
        crop_stop_col = crop_start_col + config['crop_width']

        preprocessor = Preprocessor()

        img = img_to_array(load_img(left_filename),
                           data_format='channels_first')
        img = img[:, crop_start_row:crop_stop_row,
                  crop_start_col:crop_stop_col]
        left_img = preprocessor.resize_img(img, [
            config['channels'], config['resized_height'],
            config['resized_width']
        ])

        img = img_to_array(load_img(right_filename),
                           data_format='channels_first')
        img = img[:, crop_start_row:crop_stop_row,
                  crop_start_col:crop_stop_col]
        right_img = preprocessor.resize_img(img, [
            config['channels'], config['resized_height'],
            config['resized_width']
        ])

        self.left_img = np.expand_dims(left_img, axis=0)
        self.right_img = np.expand_dims(right_img, axis=0)
Пример #22
0
def load_data(batch_size):
    '''
    Loads training, validation and test data from resources.
    '''
    data_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '../resources'))
    data_config = {
        "training": {
            "size": 0.8
        },
        "test": {
            "size": 0.1
        },
        "validation": {
            "size": 0.1
        }
    }

    p = Preprocessor(base_path=data_path, datasets=data_config)

    train_data = p.generate_images('training',
                                   shuffle=True,
                                   batch_size=batch_size)
    valid_data = p.generate_images('validation',
                                   shuffle=False,
                                   batch_size=batch_size)

    class_weights = p.get_class_weights()

    return train_data, valid_data, class_weights
Пример #23
0
def main(_):

    if check_path_validity() == -1:
        exit(1)

    FLAGS.logdir = FLAGS.logdir if FLAGS.logdir.endswith(
        '/') else FLAGS.logdir + '/'
    # Make a new directory to store checkpoints and tensorboard summaries,
    # this is only necessary if were are going to train a new model.
    if FLAGS.training:
        os.makedirs(FLAGS.logdir)

    # Setup tensorflow and tensorboard writers
    tf.reset_default_graph()
    session = tf.Session()
    writer = tf.summary.FileWriter(FLAGS.logdir,
                                   session.graph) if FLAGS.visualize else None
    summary_ops, summary_placeholders = setup_summary()

    # Initialize key objects: environment, agent and preprocessor
    env = Environment("127.0.0.1", 9090)
    agent = DDQNAgent(session, num_actions, width, height, FLAGS.logdir,
                      writer)
    preprocessor = Preprocessor(width, height)

    if FLAGS.training:
        summarize_func = partial(summarize, session, writer, summary_ops,
                                 summary_placeholders)
        train(agent, env, preprocessor, summarize_func)
    else:
        play(agent, env, preprocessor)
    def __init__(self,
                 data_dir,
                 coord,
                 symbol_list,
                 year_range,
                 symbol_first,
                 data_win_len,
                 receptive_field,
                 queue_size=500):
        # system initialize
        self.db_manager = DBManager(data_dir)
        self.preprocessor = Preprocessor()

        self.coord = coord
        self.threads = []

        # processing params
        self.data_dir = data_dir
        self.symbol_list = symbol_list
        self.year_range = year_range
        self.symbol_first = symbol_first
        self.data_win_len = data_win_len
        self.receptive_field = receptive_field

        # queue setup
        self.trans_placeholder = tf.placeholder(dtype=tf.float32, shape=None)
        self.trans_queue = tf.PaddingFIFOQueue(queue_size, ['float32'],
                                               shapes=[(None, 1)])
        self.trans = self.trans_queue.enqueue([self.trans_placeholder])
        # for multithreading:
        self.yield_list = itertools.product(
            self.symbol_list,
            self.year_range) if self.symbol_first else itertools.product(
                self.year_range, self.symbol_list)
Пример #25
0
def voodoo(input,
           output,
           pathToRemoveFromIdentifier,
           voodooDBFile,
           includes,
           defines,
           preIncludes,
           trace=False):
    inputLines = _readLinesOfFile(input)
    perFileSettings = PerFileSettings(inputLines)
    preprocessor = Preprocessor(input, output, inputLines,
                                pathToRemoveFromIdentifier)

    out = preprocessor.header()
    out += '#include <VoodooCommon/Common.h>\n\n'
    iterator = VoodooMultiplexerIterator(perFileSettings, voodooDBFile)
    iterator.process(input,
                     includes=includes,
                     defines=defines,
                     preIncludes=preIncludes)
    out += iterator.iter()
    out += preprocessor.switchToExpectation()
    out += '#include "VoodooCommon/All.h"\n\n'
    out += iterator.expect()
    out += preprocessor.footer()
    return out
Пример #26
0
 def __init__(self):
     self._rows = 0
     self._cols = 0
     self._params = {}
     self._model = LogisticRegression(max_iter=20)
     self._preprocessor = Preprocessor()
     self.init_params()
Пример #27
0
def main(mode, other_args):
    if mode != 'build' and mode != 'detect':
        raise Exception('Unknown execution mode: {}'.format(mode))

    with open("config/config.yml", 'r') as ymlfile:
        cfg = yaml.load(ymlfile)

    td = TrendDetector(cfg)

    if mode == 'build':
        # Build model
        sl = SearchLoader(cfg)
        df = sl.load()
        pp = Preprocessor(df)
        agg_df = pp.run()
        td.build(agg_df)

        # Detect trending for all queries on the last day
        max_date = agg_df['date'].max()
        for _, row in agg_df[agg_df['date'] == max_date].iterrows():
            query = row['query']
            count = row['count']
            td.is_trending(query, count)

    else:  # 'detect' mode
        # Load model
        td.load_model()

        # Detect trending for the given query and search count
        query = other_args.query
        obs = other_args.obs
        td.is_trending(query, obs, verbose=True)
Пример #28
0
def run():
    my_preprocessor = Preprocessor('stop_words.txt')
    my_sampler = Sampler()
    my_represent = Represent()

    a_data = Data('../data/chat.txt')
    a_data.prepare_search(my_preprocessor, my_sampler, my_represent)
Пример #29
0
    def __init__(self, conf_path, template_path):
        configuration = Configuration(conf_path)
        self.preprocessor = Preprocessor(configuration)
        self.poco_processor = PocoProcessor(configuration)

        self.ros_mapper_processor = RosMapperProcessor(configuration)
        self.ros_msg_processor = RosMsgProcessor(configuration)

        self.dds_mapper_processor = DdsMapperProcessor(configuration)
        self.dds_idl_processor = DdsIdlProcessor(configuration)

        self.zmq_serializer_processor = ZmqSerializerProcessor(configuration)

        self.node_handler_processor = NodeHandlerProcessor(configuration)

        env = Environment(loader=FileSystemLoader(template_path))
        self.poco_template = env.get_template('poco_template.h')
        self.ros_mapper_template = env.get_template('ros_mapper_template.h')
        self.ros_msg_template = env.get_template('ros_template.msg')
        self.dds_mapper_template = env.get_template('dds_mapper_template.h')
        self.dds_idl_template = env.get_template('dds_template.idl')
        self.zmq_serializer_template = env.get_template(
            'zmq_serializer_template.h')
        self.node_handler_template = env.get_template(
            'node_handler_template.js')
Пример #30
0
def generate_data(path):
    """
    Date: 2018-7-13
    一鹏要求保留原始的标点符号和所有的原始的表情什么的
    :return:
    """
    my_preprocessor = Preprocessor('cc')
    a_data = Data('../data/chat-20w.txt')
    a_data.seperate_conversation()
    a_data.preprocess(my_preprocessor, format=2)

    output = []
    for conversation in a_data.conversation_all_preprocessed_seperate:
        this_conversation = []
        for sentence in conversation:
            speaker = sentence[2]
            content = sentence[5]
            content = [restore_placeholder(token) for token in content]

            this_conversation.append([speaker, content])
        output.append(this_conversation)

    with open(path, 'w', encoding='utf-8') as f:
        for conversation in output:
            for sentence in conversation:
                f.write(str(sentence[0]) + ' ' + ' '.join(sentence[1]) + '\n')
            f.write('\n')