Exemplo n.º 1
0
def generate_model(author, steps):
    """Given an author name, processes the data/<author>.txt input for steps number
    of iterations into the model input to be used by the lambda_handler
    function.
    """
    predictor = Predictor(128)

    # Filenames.
    author_models_dir = get_dir_for_author(author)
    if not os.path.exists(author_models_dir):
        os.mkdir(author_models_dir)
    model_file = author_models_dir + author + ".model"
    vocab_file = author_models_dir + author + ".vocab"
    commons_file = author_models_dir + author + ".commons"
    raw_text_file = "../data/" + author + ".txt"

    # Read in the 'frequently used words' as common vocab.
    frequent = read_common_vocab("../data/20k_most_common.txt")

    # Clean the content.
    with open(raw_text_file, 'r') as raw:
        raw_words = raw.read().split(' ')
        data, _ = clean_input_data(raw_words, frequent)

    # Write out the words that occur in the clean data to the commons file.
    record_common_vocab(data, commons_file)

    # Train the model. This step takes the longest.
    predictor.train(data, steps)

    # Save the model that we have trained to disk.
    predictor.save(model_file, vocab_file)

    return predictor
Exemplo n.º 2
0
def build_predictor(data, settings):
    sys.stdout.write('Building model\n')
    data_input = Input(shape=(settings['max_len'], ))
    bucket_size = Input(shape=(1, ), dtype="int8")
    embedding = Embedding(input_dim=settings['max_features'] + 3,
                          output_dim=settings['word_embedding_size'],
                          mask_zero=True,
                          name="emb")(data_input)
    encoder = Predictor(input_dim=settings['word_embedding_size'],
                        hidden_dim=settings['sentence_embedding_size'],
                        RL_dim=settings['RL_dim'],
                        max_len=settings['max_len'],
                        batch_size=settings['batch_size'],
                        random_action_prob=settings['random_action_prob'],
                        name='encoder')([embedding, bucket_size])
    layer = encoder[0]

    for idx, hidden_dim in enumerate(settings['hidden_dims']):
        layer = Dense(hidden_dim, name="dense_{}".format(idx))(layer)
        layer = Activation('tanh')(layer)
        layer = Dropout(settings['dense_dropout'])(layer)
    output = Dense(settings['num_of_classes'],
                   activation='softmax',
                   name='output')(layer)
    model = Model(inputs=[data_input, bucket_size],
                  outputs=[
                      output, encoder[1], encoder[2], encoder[3], encoder[4],
                      encoder[5]
                  ])
    return model
Exemplo n.º 3
0
def predict(modelPath, labelPath, wavPath):
    lf = open(labelPath, 'r')
    labelLn = lf.readlines()

    # remove \n
    label = []
    for item in labelLn:
        label.append(item.strip())

    pred = Predictor()
    pred.load(modelPath)

    #70Hzと74Hzの区別がつく分解能が要る。
    FR = 44100
    T = 1.0 / FR
    N = 16384

    rate, dataAll = wavfile.read(wavPath, True)

    if rate != FR:
        print('Error: sample rate is not 44.1kHz!')
        return

    # get channel 0 (left channel in stereo)
    if dataAll.ndim != 1:
        dataAll = dataAll[:, 0]

    print('Time        Chord   Confidence')

    nPred = (int)(len(dataAll) / FR)
    for t in range(0, nPred):
        data = dataAll[t * FR:t * FR + N]

        # 16bit int format -> float64
        x = data.astype(np.float)

        # normalize input time domain data to [-1 +1]
        xs = normalizeArray(x)

        w = scipy.signal.blackman(N)
        xf = scipy.fftpack.fft(xs * w)
        xfa = np.abs(xf)

        #70Hzと4kHzの区間を見る。
        startIdx = (int)(70 * N / FR)
        endIdx = (int)(4000 * N / FR)
        xfs = xfa[startIdx:endIdx]

        # normalize frequency domain data to [0, +1]
        xIn = normalizePositiveArray(xfs)

        xInR = np.reshape(xIn, (1, -1))
        (ypred, conf) = pred.predict(xInR)

        ypredI = int(ypred)

        #print('', ypredI)
        if conf.max() >= 0:
            print('%3d:%02d %10s %6.2f' %
                  ((int)(t / 60), t % 60, label[ypredI], conf.max()))
Exemplo n.º 4
0
    def runAlg(self, dataType):
        '''
        运行算法的函数,并且画图
        :param dataType: gsm / lte
        :return:
        '''
        if dataType != "gsm" and dataType != "lte":
            raise Exception("未知数据类型")

        #分类器和回归器的结果
        regRes = []
        claRes = []

        predorCla = Predictor("cla", dataType)
        predorReg = Predictor("reg", dataType)
        for time in range(10):
            print str(time) + " -- begin"

            #跑算法
            claRes.append(predorCla.fit())
            regRes.append(predorReg.fit())
            print str(time) + " -- change data"
            # 重新生成一个数据的划分
            predorReg.changeData()
            predorCla.changeData()

        #排序
        regRes.sort()
        claRes.sort()

        folder = "gsmResult/" if "gsm" == dataType else "lteResult/"

        #画图
        self.draw(regRes, folder + "regImg")
        self.draw(claRes, folder + "claImg")

        #输出结果和中位结果
        resFile = open(folder + "res", 'w')
        resFile.write("reg result:\n")
        resFile.write(str(regRes))
        resFile.write("\nreg mid:\n")
        resFile.write(str((regRes[4] + regRes[5]) / 2))

        resFile.write("\ncla result:\n")
        resFile.write(str(claRes))
        resFile.write("\ncla mid:\n")
        resFile.write(str((claRes[4] + claRes[5]) / 2))
Exemplo n.º 5
0
 def __init__(self):
     self.FORMAT = pyaudio.paFloat32
     self.CHANNELS = 1
     self.RATE = 44100
     self.CHUNK = 1024 * 2
     self.p = None
     self.stream = None
     self.chunksRead = None
     self.predictor = Predictor()
Exemplo n.º 6
0
    def __init__(self):

        if exists('database.dat'):
            # deserialize database is much faster.
            print('deserialize the QA database...')
            self.search_engine = SearchEngine('cppjieba/dict', 'database.dat')
        else:
            # load database from txt is slower.
            print('load from QA database from txt format...')
            self.search_engine = SearchEngine('cppjieba/dict')
            self.search_engine.loadFromTxt('question_answer.txt')
            self.search_engine.save('database.dat')
        self.predictor = Predictor()
Exemplo n.º 7
0
 def get_data(self):
     # Gets the result from the SVM model
     p = Predictor(self.filePath)
     # Gets the result using the SVM model
     p.assess_func()
     #store result
     self.result = p.result_data
     self.data = p.raw_data  #store raw data
     self.channelName = p.header  #store electrodes information for the GUI
     self.eegLength = len(
         p.raw_data) / 256  #gets the length of the eeg in seconds
     self.bad = p.bad  # Bad channels in the EEG
     self.powers = p.powers  # power of different waves in the eeg
Exemplo n.º 8
0
def DoWork(source_file, m, k, fltr, trade_cost, testing_start_date,
           testing_end_date):

    # Set some defaults
    p = Predictor(source_file, m, k, fltr, testing_start_date,
                  testing_end_date)
    p.read_file_to_daily_data_by_weeks(0)
    p.calc_historical_weekly_return(p.daily_data_by_weeks)

    # Initialize variables
    strategy_trade_count = 0
    in_market_count = 0
    current_state = 0
    strategy_capital = 100
    bh_capital = 100
    strategy_weekly_return = []
    bh_weekly_return = []

    for idx in range(p.start_split, p.end_split):

        # Generate weekly signals
        p.weekly_return_data = p.historical_weekly_return_data[:idx - 1]
        alist = p.find_k_closest_histories()
        r = p.calc_next_week_return(alist)
        cur_signal = p.signal(current_state, r)

        # Signal handling
        cur_index_price = p.daily_data_by_weeks[idx][-1][2]
        prev_index_price = p.daily_data_by_weeks[idx - 1][-1][2]
        actual_return = math.log(cur_index_price) - math.log(prev_index_price)
        if current_state != cur_signal:
            current_state = cur_signal
            strategy_trade_count += 1
            strategy_capital = strategy_capital * (1 - trade_cost)

        # K Nearest Neighbor
        strategy_capital = strategy_capital * (1 +
                                               actual_return * current_state)
        strategy_weekly_return.append(actual_return)

        # Buy-and-Hold
        if current_state == 1:
            #in_market_count += 1
            bh_capital = bh_capital * (1 + actual_return)
            bh_weekly_return.append(actual_return)

        print('{0}, {1}, {2}'.format(p.daily_data_by_weeks[idx - 1][-1][0],
                                     strategy_capital, bh_capital))
Exemplo n.º 9
0
def load(positive_class, negative_class):
    """
    Helper function that loads in the model and creates a DirectoryManipulator.

    :return: The model and a DirectoryManipulator.
    """
    print("#" * 15)
    print("loading model...")
    print("#" * 15)
    p = Predictor(
        r"models\inceptionResNetV2_optimized_h5\inceptionResNetV2_optimized.h5",
        (positive_class, negative_class))
    print("#" * 15)
    print("model loaded.")
    print("#" * 15)
    d = DirectoryManipulator()
    return p, d
Exemplo n.º 10
0
class MLP_Runner:
    trainer = Trainer()
    predictor = Predictor()

    def data_load(self):
        dataset, datatarget, T_len = self.db.MLP_fetch_data()
        return dataset, datatarget, T_len

    def train(self, dataset, datatarget, T_len):
        print("check......................................! \n")
        data_set, data_target, test_set, test_target = self.trainer.one_hot_encode_normalize(dataset, datatarget, T_len)
        print("done.............")
        
        self.trainer.Train(data_set, data_target)
        
        print(test_set[:1000])
        self.predictor.predict(test_set, test_target)
Exemplo n.º 11
0
def lambda_handler(event, context):
    """Use a model for an existing author to generate length words, interleaved
       with user text input."""
    author = event["author"]
    user_text = event["userText"]
    length = event["length"]

    # Load in the predictor
    model_file = get_dir_for_author(author) + author + ".model"
    vocab_file = get_dir_for_author(author) + author + ".vocab"
    predictor = Predictor(128, model=model_file, vocab=vocab_file)

    # Clean the user data and separate out unknown words.
    common_vocab = read_common_cocab(get_dir_for_author(author) + author + ".commons")
    data, unique_user_words = clean_input_data(user_text, common_vocab)

    generated_sample = predictor.sample(length)
    return clean_generated_data(' '.join(generated_sample), unique_user_words)
Exemplo n.º 12
0
def update_bert(session):

    assert type(session) is str
    # download qa database from database.
    try:
        logger.info('get the latest knowledge from wd_qa_knowledge...')
        db = MySQLdb.connect(host=db_host,
                             user=db_usr,
                             passwd=db_psw,
                             db=db_name,
                             charset='utf8')
        sql = "select question,answer from wd_qa_knowledge"
        cur = db.cursor()
        cur.execute(sql.encode('utf-8'))
        qa = str()
        for row in cur.fetchall():
            qa += row[0] + "\t" + row[1] + "\n"
        with open("question_answer.txt", "wb") as f:
            f.write(qa.encode('utf-8'))
        db.commit()
        db.close()
    except Exception as e:
        logger.error(e)
        response = jsonify({'status': 'failure'})
        socketio.emit('msg', namespace='/socket', room=session, data=response)
        return
    # generate dataset.
    try:
        logger.info('generating training set...')
        from subprocess import call
        call(
            ["./create_dataset", "-i", "question_answer.txt", "-o", "dataset"])
    except Exception as e:
        logger.error(e)
        response = jsonify({'status': 'failure'})
        socketio.emit('msg', namespace='/socket', room=session, data=response)
        return
    # finetune model
    logger.info('training...')
    from Predictor import Predictor
    predictor = Predictor()
    predictor.finetune('dataset')
    response = jsonify({"status": "success"})
    socketio.emit('msg', namespace='/socket', room=session, data=response)
Exemplo n.º 13
0
 def searchProperties(self, BHK, FloorArea, Bathrooms, Locality,
                      FurnishingStatus, TypeofSale):
     configParser = configparser.ConfigParser(allow_no_value=True)
     keys = []
     places = []
     configFilePath = 'config.ini'
     configParser.read(configFilePath)
     if (len(configParser.read(configFilePath)) > 0):
         details = configParser.sections()[0]
         for key in configParser[details]:
             keys.append(key)
         places += configParser[details][keys[0]].split(',')
     places_key = []
     for place in places:
         places_key += ['Locality_' + place]
     inp = {}
     inp = {
         'BHKs': [BHK],
         'FloorArea': [FloorArea],
         'Bathrooms': [Bathrooms]
     }
     for place in places_key:
         inp[place] = [0]
     inp.update({
         'Type_of_Sale_New': [0],
         'Type_of_Sale_Resale': [0],
         'Furnishing_Status_Furnished': [0],
         'Furnishing_Status_Semi-Furnished': [0],
         'Furnishing_Status_Unfurnished': [0]
     })
     inp['Locality_' + Locality] = [1]
     inp['Furnishing_Status_' + FurnishingStatus] = [1]
     inp['Type_of_Sale_' + TypeofSale] = [1]
     #print(inp)
     predictor = Predictor()
     #print(type(predictor.regressor))
     #print(predictor.price)
     #return 0
     price = predictor.getPredictedPrice(inp)
     if price:
         return price
     else:
         predictor.managePredictor()
         return predictor.getPredictedPrice(inp)
Exemplo n.º 14
0
    def __init__(self,
                 location,
                 inputFile,
                 outputDir=None,
                 cns=False,
                 reject=None,
                 angleOnly=False,
                 ppm=False,
                 progressBar=None,
                 writePgm=True):

        self.input = inputFile
        self.progressBar = progressBar

        print 'DANGLE (version 1.1)'
        print DANGLE_CITE

        # 1. read config file for location of reference information
        self.reference = Reference(os.path.dirname(location))
        self.reference.outDir = outputDir or OUTDIR
        if not os.path.isdir(self.reference.outDir):
            os.makedirs(self.reference.outDir)

        self.reference.cns = cns
        self.reference.ppm = ppm
        self.reference.angleOnly = angleOnly

        if (reject is not None):
            self.reference.rejectThresh = reject

        # 2. read shifts of query protein (input) and calculate secondary shifts
        self.query = Protein(self.reference)
        self.query.readShiftsFromXml(inputFile)

        # 3. compare with DB
        print 'STEP1: Shift search'
        self.topMatches = self.compareWithShiftDB()

        # 4. make preditions from scorograms
        print 'STEP2: GLE generation'
        self.predictor = Predictor(self.query, self.topMatches, self.reference,
                                   writePgm)
        self.predictions = self.predictor.predictPhiPsiFromDatabaseMatches(
            progressBar=self.progressBar)
Exemplo n.º 15
0
def load_model():
    global predictor

    current_model_dir = model_dir + 'current_model/'
    
    try:
        os.stat(current_model_dir)
    except:
        os.mkdir(current_model_dir)

    model_path = get_model_file(current_model_dir)
    if None == model_path:
        zip_ref = zipfile.ZipFile(model_dir+'current_model.zip', 'r')
        zip_ref.extractall(current_model_dir)
        zip_ref.close()

    model_path = get_model_file(current_model_dir)

    predictor = Predictor(current_model_dir + model_path)
Exemplo n.º 16
0
 def simulate_generations(self, num_generations, print_best):
     file_path = "csv/ESN_Results.csv"
     dw = DataWriter()
     dr = DataReader()
     dw.init_table(file_path)
     p = Predictor()
     mapping = dr.get_mapping()
     images = dr.get_images(112800, 28, 28) # 112800 images in data set
     scale_factor = 10
     for i in range(num_generations):
         sum = 0
         best_score = -100
         best_accuracy = -100
         best_net = []
         engines = []
         for net in self.networks:
             engine = [net, 0, 0]
             engines.append(engine)
         p.make_predictions(engines, mapping, images, scale_factor)
         for j in range(len(engines)):
             self.networks[j].fitness = engines[j][1]
             if engines[j][2] > best_score:
                 best_score = engines[j][2]
                 best_net = self.networks[j]
             if engines[j][1] > best_accuracy:
                 best_accuracy = engines[j][1]
         avg_accuracy = self.avg_fitness(self.networks) # avg accuracy
         for j in range(len(engines)):
             self.networks[j].fitness = engines[j][2] # change fitness to score
         avg_score = self.avg_fitness(self.networks) # avg accuracy
         avg_size = self.avg_network_size()
         if print_best:
             best_net.show_net()
         print("-----------------------------------\t\t\t\t\t\t\n       Generation " + str(i+1) + " results\n-----------------------------------\n", end='\n')
         print("Highest accuracy: " + str(best_accuracy*100) + "%\nHighest score: " + str(best_score**(1.0/scale_factor)) + "\nAverage accuracy: " + str(avg_accuracy*100) + "%\nAverage score: " + str(avg_score**(1.0/scale_factor)) + "\nNum species: " + str(len(self.species)) + "\nInnovs tried: " + str(self.networks[0].master_innov[0]) + "\nAverage connections per network: " + str(avg_size) + "\n")
         
         non_jit = self.construct_non_jit(best_net)
         pickle.dump(non_jit, open("neural_net.txt", "wb"))
         dw.write_row(file_path, [i+1, best_accuracy*100, avg_accuracy*100, best_score**(1.0/scale_factor), avg_score**(1.0/scale_factor), avg_size])
         if i != num_generations-1:
             self.prepare_next_gen(math.ceil(self.pop_size/10))
             print("\nStarting Generation " + str(i+2) + ": Species = " + str(len(self.species)) + ", Innovs = " + str(self.networks[0].master_innov[0]), end='\n')
     print("Finished simulation!")
Exemplo n.º 17
0
def train(datasetPath, modelSavePath):
    print('reading file...')
    frame = read_data(datasetPath)

    # Process data into feature and label arrays
    print("Processing {} samples with {} attributes".format(
        len(frame.index), len(frame.columns)))
    X_train, X_test, y_train, y_test = get_features_and_labels(frame)

    #print('', y_test)
    #print('', X_test[0])

    print('training...')
    pred = Predictor()
    pred.learn(X_train, y_train)

    y_pred = pred.predict1(X_test)
    score = f1_score(y_test, y_pred, average='weighted')
    print('score = ', score)
    pred.save(modelSavePath)
    return score
Exemplo n.º 18
0
def main():
    print "-- Welcome to movie-recommend! --"

    # for output readability
    np.set_printoptions(formatter={'float_kind': '{:25f}'.format})

    # baseline predictor by default
    mode = BASELINE

    # read command-line argument, if provided
    if len(sys.argv) > 1:
        if sys.argv[1] == IMPROVED or sys.argv[1] == BASELINE:
            mode = sys.argv[1]
            print "\tYou chose", mode, "predictor!"
        else:
            print "\t", sys.argv[
                1], "is not a valid argument. Default:", mode, "predictor!"
    else:
        print "\tYou did not provide any arguments. Default:", mode, "predictor!"

    # read and parse text files
    parser = Parser(mode)
    print "\tParser initialized:"
    print "\t\t", len(parser.test_set), "test points and", np.count_nonzero(
        parser.training_matrix), "training points"

    # initialize predictor and calculate rmse
    predictor = Predictor(mode, parser.training_matrix, parser.test_set)
    print "\trmse on test data (baseline):", predictor.rmse_test
    if predictor.mode == BASELINE:
        print "\trmse on training data (baseline):", predictor.rmse_training
    else:
        print "\trmse on test data (improved):", predictor.rmse_test_improved

    # execute histogram plotting and get error distribution
    error_dist = predictor.calculate_absolute_errors(
        parser.test_set, predictor.improved_matrix
    ) if predictor.mode == IMPROVED else predictor.calculate_absolute_errors(
        parser.test_set, predictor.baseline_matrix)
    print "\tHistogram saved to file. Error distribution:", error_dist
Exemplo n.º 19
0
def main():
    toy = True

    market, news = read_data()
    train_idx, val_idx, test_idx = split_data(market, toy)

    # Create preprocessors
    market_prepro = MarketPrepro()
    market_prepro.fit(train_idx, market)
    news_prepro = NewsPrepro()
    news_prepro.fit(train_idx, news)
    prepro = JoinedPreprocessor(market_prepro, news_prepro)

    # Train data generator instance
    join_generator = JoinedGenerator(prepro, train_idx, market, news)
    val_generator = JoinedGenerator(prepro, val_idx, market, news)
    print('Generators created')

    # Create and train model
    model = ModelFactory.lstm_128(
        len(market_prepro.feature_cols) + len(news_prepro.feature_cols))
    model.load_weights("best_weights.h5")
    print(model.summary())
    ModelFactory.train(model, toy, join_generator, val_generator)

    # Predict
    predictor = Predictor(prepro, market_prepro, news_prepro, model,
                          ModelFactory.look_back, ModelFactory.look_back_step)
    y_pred, y_test = predictor.predict_idx(test_idx, market, news)

    y_pred = predictor.predict(market, news)

    plt.plot(y_pred)
    plt.plot(y_test)
    plt.legend(["pred", "test"])
    plt.show()

    # get_merged_Xy(train_idx.sample(5), market, pd.DataFrame([],columns=news.columns)).head()
    print('The end')
Exemplo n.º 20
0
def main(argv):

  yolov3 = tf.keras.models.load_model(FLAGS.model, compile = False);
  predictor = Predictor(yolov3 = yolov3);
  anno = COCO(join(FLAGS.annotation_dir, 'instances_val2017.json'));
  count = 0;
  for imgid in anno.getImgIds():
    print("processing (%d/%d)" % (count, len(anno.getImgIds())));
    detections = list();
    # predict
    img_info = anno.loadImgs([imgid])[0];
    img = cv2.imread(join(FLAGS.coco_eval_dir, img_info['file_name']));
    boundings = predictor.predict(img).numpy();
    # collect results
    for bounding in boundings:
      detections.append([imgid, bounding[0], bounding[1], bounding[2] - bounding[0], bounding[3] - bounding[1], bounding[4], label_map.index(int(bounding[5]) + 1)]);
    count += 1;
  cocoDt = anno.loadRes(np.array(detections));
  cocoEval = COCOeval(anno, cocoDt, iouType = 'bbox');
  cocoEval.params.imgIds = anno.getImgIds();
  cocoEval.evaluate();
  cocoEval.accumulate();
  cocoEval.summarize();
Exemplo n.º 21
0
def main():
    training_set = TrainingSetHandler()
    training_set.load_training_set()
    gram_list = []

    for size in Constants.SIZE_OF_GRAMS:
        for language in training_set.language_list:
            gram = NGram(size, string.ascii_lowercase, 0.5)
            gram.train(training_set.training_set[language], language)
            gram_list.append(gram)

    #dump copies of grams to file
    dump_grams(gram_list)

    predic = Predictor(gram_list)
    test_set_handler = TestSetHandler()
    test_set_handler.load_test_sentence()
    for idx, sentence in enumerate(test_set_handler.test_set):
        clean_sentence = "".join([c for c in sentence[1] if c.isalpha()]).lower()
        prediction = predic.predict_this_sentence(clean_sentence)
        with open(os.path.join(Constants.OUTPUT_PATH, "out{}.txt".format(idx)), 'w') as f:
            output = OutputHelper(prediction, sentence, f)
            output.print_and_save_output()