Exemplo n.º 1
0
def main():
	parser = argparse.ArgumentParser(description='Decode packets from the Maxkon 433MHz weather station.')
	parser.add_argument('--file', '-f', help='Decode packets from a WAV file')
	parser.add_argument('--show-raw', '-r', action='store_true', help='Show raw packet data in hexidecimal')
	parser.add_argument('--print-on-times', '-t', action='store_true', help='Show the on times as reported by the PWMDecoder')
	args = parser.parse_args()

	decoder = Decoder(args.print_on_times)

	if (args.file):
		# Suppress WAV file warnings
		warnings.filterwarnings('ignore')
		sampFreq, samples = wavfile.read(args.file)
		warnings.filterwarnings('default')

		packets = decoder.add_samples(samples.tolist())
		for packet in packets:
			print_packet(packet, args.show_raw)
	else:
		def audio_samples_ready(samples):
			packets = decoder.add_samples(samples)

			for packet in packets:
				print_packet(packet, args.show_raw)

		audio_in = AudioIn(audio_samples_ready)

		while True:
			time.sleep(0.1)
Exemplo n.º 2
0
def get_n_best_lists(initial_params, n, args):
    sys.stderr.write("Getting n best lists...\n")
    num_songs_translated = 0
    n_best_lists = {}
    i = 0
    f = open(args.training_songs, "r")
    for path in f:
        path = path.strip()
        if not path:
            continue
        training_song = converter.parse(path);
        num_songs_translated += 1
        transpose(training_song, "C")
        sys.stderr.write("transposed " + path + "\n")
        lm = LanguageModel(args.harmony, "%s/%s_language_model.txt" % (args.model_directory, args.harmony))
        tms = []
        melodies = args.melodies.split(",")
        for melody in melodies:
            phrases = "%s/%s_%s_translation_model_rhythm.txt" % (args.model_directory, melody, args.harmony)
            notes = "%s/%s_%s_translation_model.txt" % (args.model_directory, melody, args.harmony)
            tm = TranslationModel(melody, args.harmony, phrases, notes)
            tms.append(tm)
        d = Decoder([(melody, training_song.parts[melody]) for melody in melodies], 
                    lm, tms,
                    tm_phrase_weight=initial_params[0], tm_notes_weight=initial_params[1],
                    lm_weight=initial_params[2])
        try:
            hyps = d.decode(n)
            n_best_lists[path] = hyps
            sys.stderr.write("decoded " + path + "\n")
            i += 1
        except Exception as e:
            sys.stderr.write(str(e))

    return n_best_lists
Exemplo n.º 3
0
 def download_file_task(self, file_obj, item):
   funcName = "[Downloader.download_file_task]"
   log(6, funcName, 'Downloading file:', file_obj.name)
   decoder = Decoder(item, file_obj)
   try:
     Thread.AcquireLock(self.article_lock)
     for article_obj in file_obj.articles:
       self.article_queue.put(DownloadInfo(file_obj, article_obj, decoder, item))
       log(9, funcName, 'self.article_queue.qsize():', self.article_queue.qsize())
   except:
     log(1, funcName, 'Error adding articles to queue')
   finally:
     Thread.ReleaseLock(self.article_lock)
   log(7, funcName, 'Waiting for decoder to complete for file', file_obj.name)
   decoder.wait()
   log(7, funcName, 'downloaded filename:', decoder.filename, 'size:', len(decoder.data))
   #Core.storage.save(Core.storage.join_path(item.incoming_path, decoder.filename), decoder.data)
   #saver = Saver(item.incoming_path, decoder.filename, decoder.data)
   #saver.save()
   #log(7, funcName, 'saved file:', decoder.filename)
   #return (decoder.filename, decoder.decoded_data)
   if file_obj in item.nzb.rars:
     if item.nzb.rars[item.nzb.rars.index(file_obj)].name != decoder.filename:
       log(3, funcName, 'Updating item nzb rars file to', decoder.filename)
       item.nzb.rars[item.nzb.rars.index(file_obj)].name=decoder.filename
   if file_obj in item.nzb.pars:
     if item.nzb.pars[item.nzb.pars.index(file_obj)].name != decoder.filename:
       log(3, funcName, 'Updating item nzb pars file to', decoder.filename)
       item.nzb.pars[item.nzb.pars.index(file_obj)].name=decoder.filename
     
   item.save()
   return decoder.filename, decoder.data
Exemplo n.º 4
0
def main():
    dbc = DBController()
    dec = Decoder([TLEDecoder(), TLEListDecoder()])

    dlc = None

    try:
        dlc = Downloader()
    except DownloaderError as e:
        print("failed to initialize downloader: " + str(e))
        sys.exit(1)

    for esat in dlc.get_data():
        sats = []
        try:
            sats = dec.decode(esat.fmt, esat.data)
        except DecoderError as e:
            print("failed to decode: " + str(e))

        try:
            for sat in sats:
                dbc.add(sat)
            dbc.sync()
        except DBError as e:
            print("failed to insert into db: " + str(e))
Exemplo n.º 5
0
 def parse_contents(f, bytes_to_read):
   '''
   Do complex reading of caption data from binary file.
   Return a list of statements and characters
   '''
   if DEBUG:
     print 'going to read {bytes} bytes in binary file caption statement.'.format(bytes=bytes_to_read)
   statements = []
   bytes_read = 0
   #TODO: Check to see if decoder state is carred between packet processing
   #currently recreating the decoder (and therefore resetting its state)
   #on every packet paylod processing. This may be incorrect
   decoder = Decoder()
   line = ''
   while bytes_read<bytes_to_read:
     statement = decoder.decode(f)
     if statement:
         bytes_read += len(statement)
         statements.append(statement)
     #if isinstance(statement, code_set.Kanji) or isinstance(statement, code_set.Alphanumeric) \
     #  or isinstance(statement, code_set.Hiragana) or isinstance(statement, code_set.Katakana):
     #  if DEBUG:
     #    print statement #just dump to stdout for now
   #    line += str(statement)
   #if len(line)>0:
   #  print '{l}\n'.format(l=line)
   return statements
Exemplo n.º 6
0
 def decode(self):
     encoded_content = open(self.args.input, 'rb').read()
     mappings = json.loads(open(self.args.mappings, 'r').read())
     decoder = Decoder(encoded_content, mappings)
     raw_content = decoder.decode()
     with open(self.args.output, 'w') as f:
         f.write(raw_content)
Exemplo n.º 7
0
class JsonUnmarshaler(object):
    """The top-level Unmarshaler used by the Reader for JSON payloads.  While
    you may use this directly, it is strongly discouraged.
    """
    def __init__(self):
        self.decoder = Decoder()

    def load(self, stream):
        return self.decoder.decode(json.load(stream, object_pairs_hook=OrderedDict))

    def loadeach(self, stream):
        for o in sosjson.items(stream, object_pairs_hook=OrderedDict):
            yield self.decoder.decode(o)
Exemplo n.º 8
0
class MsgPackUnmarshaler(object):
    """The top-level Unmarshaler used by the Reader for MsgPack payloads.
    While you may use this directly, it is strongly discouraged.
    """
    def __init__(self):
        self.decoder = Decoder()
        self.unpacker = msgpack.Unpacker(object_pairs_hook=OrderedDict)

    def load(self, stream):
        return self.decoder.decode(msgpack.load(stream, object_pairs_hook=OrderedDict))

    def loadeach(self, stream):
        for o in self.unpacker:
            yield self.decoder.decode(o)
Exemplo n.º 9
0
 def build(self):
     Window.size = (1024, 520)
     self.dec = Decoder(self.prop_set)
     Clock.schedule_interval(self.update_time, .5)
     Clock.schedule_interval(self.visualize, .4)
     thr = threading.Thread(target=self.get_candata)
     thr.setDaemon(True)
     thr.start()
def ric_decode(imgbuf):
    offset = 0
    boxType = None
    boxLen, boxType, payload = iso_media.read_box(imgbuf[offset:])
    if boxType != "FTYP" or payload != "RIC ":
        print >> sys.stderr, "Fishy file type!!!", boxType, payload
        return None
    offset += boxLen
    boxLen, boxType, payload = iso_media.read_box(imgbuf[offset:])
    if boxType != "ILOT":
        print >> sys.stderr, "No offset table???", boxType
        return None
    offset += boxLen
    layers = wrapper.unwrapLayers(imgbuf[offset:])
    outputImg = Decoder().decode(layers)
    output = StringIO()
    outputImg.save(output, "JPEG", quality = 90);
    return output.getvalue()
Exemplo n.º 11
0
 def __init__(self, fname):
     self.fname = fname
     self.decoder = Decoder(self)
     self.breakpoints = {}
     self.breakpoint_conditions = {}
     self.prev_input = None
     self.hex_input_mode = False
     self.tracked_registers = set([SP])
     self.reset()
Exemplo n.º 12
0
    def decode(self, reader, writer):
        '''
        compute pseudo likelihoods the testing set

        Args:
            reader: a feature reader object to read features to decode
            writer: a writer object to write likelihoods
        '''

        #create a decoder
        decoder = Decoder(self.dnn, self.input_dim, reader.max_input_length)

        #read the prior
        prior = np.load(self.conf['savedir'] + '/prior.npy')

        #start tensorflow session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True #pylint: disable=E1101
        with tf.Session(graph=decoder.graph, config=config):

            #load the model
            decoder.restore(self.conf['savedir'] + '/final')

            #feed the utterances one by one to the neural net
            while True:
                utt_id, utt_mat, looped = reader.get_utt()

                if looped:
                    break

                #compute predictions
                output = decoder(utt_mat)

                #get state likelihoods by dividing by the prior
                output = output/prior

                #floor the values to avoid problems with log
                np.where(output == 0, np.finfo(float).eps, output)

                #write the pseudo-likelihoods in kaldi feature format
                writer.write_next_utt(utt_id, np.log(output))

        #close the writer
        writer.close()
Exemplo n.º 13
0
def run(file_name):
    decoder = Decoder('{}.png'.format(file_name))

    pixel_lines = [
        create_pixels(decoder.Pixel, scanline, decoder.bytes_per_pixel)
        for scanline in decoder.decode()
    ]

    c = Classifier()
    color_lines = []
    for line in pixel_lines:
        colors = [
            c.classify(Point(pixel.red, pixel.green, pixel.blue, pixel.alpha))
            for pixel in line
        ]

        color_lines.append(colors)

    with open('{}_colors.json'.format(file_name), 'w') as file:
        file.write(json.dumps(color_lines))
Exemplo n.º 14
0
def decodeAndSolve(image, showSolution=False):
    d = Decoder(image)
    d.decode()
    s = SuDoKu(d.puzzle)
    solution = s.solution()
    if showSolution:
        img = copy(d.puzzleImage)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
        for q, p in ((x, y) for x in (i * 100 + 30 for i in range(9)) for y in (i * 100 + 70 for i in range(9))):
            if ((q - 30) / 100, (p - 70) / 100) not in d.numberLocations:
                cv2.putText(
                    img,
                    str(solution[(q - 30) / 100][(p - 70) / 100]),
                    (q, p),
                    cv2.FONT_HERSHEY_PLAIN,
                    4,
                    (0, 150, 0),
                    thickness=6,
                )
        cv2.imshow("Solution - Press any key to exit.", img)
        cv2.waitKey(0)
    def simulate(self, mode=Decoder.SUM_PROD):
        """

        :param mode: The algorithm (sum-prod vs max-prod) to use in the decoder simulations
        :return:
        """
        self.mode = mode
        self.codewords = []
        self.decoded = []
        for var in self.variance_levels:
            codewords = []
            decoded = []
            transmissions = []
            decoder = Decoder(var, self.mode)
            for i in range(0, self.iterations):
                code = codeword.Codeword()
                codewords.append(code.codeword)
                decoded.append(decoder.decode(code.transmit(var)))
                transmissions.append(code.transmission)
            self.codewords.append(codewords)
            self.decoded.append(decoded)
            self.transmissions.append(transmissions)
Exemplo n.º 16
0
 def _predict_processing(self , predict_path , output_path) :
     if isinstance(output_path , file) :
         output_f = output_path 
     else :
         if  output_path == "stdout" :
             output_f = sys.stdout
         else :
             output_f = open(output_path , "w")
     logging.info("set output %s " %(output_f.name))
     logging.info("reading instance from %s . predicting ." %(predict_path))
     for instance , separator_data in DatasetHandler.read_predict_data(predict_path) :
         self.constrain.set_constrain_data(separator_data)
         predict_tags = Decoder.decode_for_predict(self.extractor , self.model , self.constrain , instance)
         segmented_line = self._processing_unigrams_and_tags2segmented_line(instance,predict_tags)
         output_f.write("%s" %( "".join([segmented_line , os.linesep]) ) )
     if output_f is not sys.stdout :
         output_f.close()
     logging.info("predicting done.")
Exemplo n.º 17
0
 def _4training_evaluate_processing(self , dev_path) :
     nr_processing_right = 0
     nr_gold = 0
     nr_processing = 0
     for instance in DatasetHandler.read_dev_data(dev_path) :
         unigrams , gold_tags = Segmentor._processing_one_segmented_WSAtom_instance2unigrams_and_tags(instance)
         predict_tags = Decoder.decode_for_predict(self.extractor , self.model , self.constrain , unigrams)
         gold_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags(gold_tags)
         predict_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags(predict_tags)
         cur_nr_gold , cur_nr_processing , cur_nr_processing_right = (
                         self.__innerfunc_4evaluate_get_nr_gold_and_processing_and_processing_right(gold_coor_seq , predict_coor_seq)
                 )
         nr_gold += cur_nr_gold
         nr_processing += cur_nr_processing
         nr_processing_right += cur_nr_processing_right
     p , r , f = self.__innerfunc_4evaluate_calculate_prf(nr_gold , nr_processing , nr_processing_right)
     print >>sys.stderr , ("Eval result :\np : %.2f%% r : %.2f%% f : %.2f%%\n"
            "total word num : %d total predict word num : %d predict right num : %d ")%(
             p * 100 , r * 100, f * 100 , nr_gold , nr_processing , nr_processing_right
             )
     return f
    def encode_decode(self, k):

        print "\nTesting encoding and then decoding with k = %s" % k

        md5 = hashlib.md5()

        with FileChunker(k, SYMBOLSIZE, DEFAULT_FILE) as chunker:
            chunk = chunker.chunk()
            while chunk:
                padding = chunk.padding

                symbols = [(i, chunk[i]) for i in xrange(k)]
                encoder = Encoder(k, symbols)
                symbols = []

                # Start at k/2 and produce 1.25k more symbols to get a mix
                # of parity and source symbols
                for i in xrange(k * 2):
                    symbols.append(encoder.next())

                encoder = None
                decoder = Decoder(k)
                for tup in symbols:
                    decoder.append(tup)

                decoder.decode()
                decoded = bytearray()
                for i in xrange(k):
                    esi, s = decoder.next()
                    decoded += s.tostring()
                decoder = None

                if padding:
                    padding = 0 - padding
                    print "Removing padding", padding, "bytes"
                    decoded = decoded[:padding]

                md5.update(decoded)
                # Continue on to the next chunk
                chunk = chunker.chunk()

        print "Original digest:", self.original_digest
        print "Decoded digest:", md5.hexdigest()
        return self.original_digest == md5.hexdigest()
Exemplo n.º 19
0
def main(infile, outfile, fromline=0):
    fin = open(infile, 'r')
    fout = open(outfile, 'w')
    fseek = open(log_path + '/db_progress', 'w')
    
    fout.write("USE %s;\n" % database)
    
    # read in file
    i = 0 # line counter
    for l in fin:
        if i < fromline: 
            i += 1
            continue # skip lines before fromline
        if 'OK' in l:
            date = l[0:10]
            time = l[11:19]
            timestamp = "'" + date + ' ' + time + "'"
            packet = l[28:].strip()
            (nid, data) = Decoder.decode(packet)
            if data:
                if nodes.has_key(nid):
                    table = nodes[nid]['db']['table']
                    fieldstring = ', '.join(table['fields'])
                    stringdata = [timestamp]
                    for d in data:
                        stringdata.append("'" + str(d) + "'")
                    valstring = ', '.join(stringdata)
                    fout.write("INSERT INTO %s (%s) VALUES (%s);\n" 
                                % (table['name'], fieldstring, valstring))            
        i += 1
    
    fseek.write(str(i)) # write last line to file
        
    fin.close()
    fout.close()
    fseek.close()  
Exemplo n.º 20
0
 def __init__(self, N=50):
     Decoder.__init__(self)
     self.N = N
Exemplo n.º 21
0
class AttentiveNP():
    """
    The Attentive Neural Process model.
    """
    def __init__(self, x_size, y_size, r_size, det_encoder_hidden_size, det_encoder_n_hidden,
                 lat_encoder_hidden_size, lat_encoder_n_hidden, decoder_hidden_size,
                 decoder_n_hidden, lr, attention_type):
        """
        :param x_size: An integer describing the dimensionality of the input x
        :param y_size: An integer describing the dimensionality of the target variable y
        :param r_size: An integer describing the dimensionality of the embedding / context
                       vector r
        :param det_encoder_hidden_size: An integer describing the number of nodes per hidden
                                    layer in the deterministic encoder NN
        :param det_encoder_n_hidden: An integer describing the number of hidden layers in the
                                 deterministic encoder neural network
        :param lat_encoder_hidden_size: An integer describing the number of nodes per hidden
                                    layer in the latent encoder neural NN
        :param lat_encoder_n_hidden: An integer describing the number of hidden layers in the
                                 latent encoder neural network
        :param decoder_hidden_size: An integer describing the number of nodes per hidden
                                    layer in the decoder neural network
        :param decoder_n_hidden: An integer describing the number of hidden layers in the
                                 decoder neural network
        :param lr: The optimiser learning rate.
        :param attention_type: The type of attention to be used. A string, either "multihead",
                                "laplace", "uniform", "dot_product"
        """

        self.r_size = r_size
        self.det_encoder = DeterministicEncoder(x_size, y_size, r_size, det_encoder_n_hidden,
                                                det_encoder_hidden_size, self_att=True,
                                                cross_att=True, attention_type=attention_type)
        self.lat_encoder = LatentEncoder((x_size + y_size), r_size, lat_encoder_n_hidden,
                                         lat_encoder_hidden_size, self_att=True)
        self.decoder = Decoder((x_size + r_size + r_size), y_size, decoder_n_hidden,
                               decoder_hidden_size)
        self.optimiser = optim.Adam(list(self.det_encoder.parameters()) +
                                    list(self.lat_encoder.parameters()) +
                                    list(self.decoder.parameters()), lr=lr)

    def train(self, x_trains, y_trains, x_tests, y_tests, x_scalers, y_scalers, batch_size,
              iterations, testing, plotting, dataname, print_freq):
        """
        :param x_trains: A np.array with dimensions [N_functions, [N_train, x_size]]
                         containing the training data (x values)
        :param y_trains: A np.array with dimensions [N_functions, [N_train, y_size]]
                         containing the training data (y values)
        :param x_tests: A tensor with dimensions [N_functions, [N_test, x_size]]
                        containing the test data (x values)
        :param y_tests: A tensor with dimensions [N_functions, [N_test, y_size]]
                        containing the test data (y values)
        :param x_scalers: The standard scaler used when testing == True to convert the
                         x values back to the correct scale.
        :param y_scalers: The standard scaler used when testing == True to convert the predicted
                         y values back to the correct scale.
        :param batch_size: An integer describing the number of times we should
                           sample the set of context points used to form the
                           aggregated embedding during training, given the number
                           of context points to be sampled N_context. When testing
                           this is set to 1
        :param iterations: An integer, describing the number of iterations. In this case it
                           also corresponds to the number of times we sample the number of
                           context points N_context
        :param testing: A Boolean object; if set to be True, then every 30 iterations the
                        R^2 score and RMSE values will be calculated and printed for
                        both the train and test data
        :param print_freq:
        :param dataname:
        :param plotting:
        :return:
        """

        n_functions = len(x_trains)

        for iteration in range(iterations):
            self.optimiser.zero_grad()

            # Sample the function from the set of functions
            idx_function = np.random.randint(n_functions)

            x_train = x_trains[idx_function]
            y_train = y_trains[idx_function]

            max_target = x_train.shape[0]

            # During training, we sample n_target points from the function, and
            # randomly select n_context points to condition on.

            num_target = torch.randint(low=5, high=int(max_target), size=(1,))
            num_context = torch.randint(low=3, high=int(num_target), size=(1,))

            idx = [np.random.permutation(x_train.shape[0])[:num_target] for i in
                   range(batch_size)]
            idx_context = [idx[i][:num_context] for i in range(batch_size)]

            x_target = [x_train[idx[i], :] for i in range(batch_size)]
            y_target = [y_train[idx[i], :] for i in range(batch_size)]
            x_context = [x_train[idx_context[i], :] for i in range(batch_size)]
            y_context = [y_train[idx_context[i], :] for i in range(batch_size)]

            x_target = torch.stack(x_target)
            y_target = torch.stack(y_target)
            x_context = torch.stack(x_context)
            y_context = torch.stack(y_context)

            # The deterministic encoder outputs the deterministic embedding r.
            r = self.det_encoder.forward(x_context, y_context, x_target)
            # [batch_size, N_target, r_size]

            # The latent encoder outputs a prior distribution over the
            # latent embedding z (conditioned only on the context points).
            z_priors, _, _ = self.lat_encoder.forward(x_context, y_context)
            z_posteriors, _, _ = self.lat_encoder.forward(x_target, y_target)

            # Sample z from the prior distribution.
            zs = [dist.rsample() for dist in z_priors]      # [batch_size, r_size]
            z = torch.cat(zs)
            z = z.view(-1, self.r_size)

            # The input to the decoder is the concatenation of the target x values,
            # the deterministic embedding r and the latent variable z
            # the output is the predicted target y for each value of x.
            dists, _, _ = self.decoder.forward(x_target.float(), r.float(), z.float())

            # Calculate the loss
            log_ps = [dist.log_prob(y_target[i, ...].float()) for i, dist in enumerate(dists)]
            log_ps = torch.cat(log_ps)

            kl_div = [kl_divergence(z_posterior, z_prior).float() for z_posterior, z_prior
                      in zip(z_posteriors, z_priors)]
            kl_div = torch.cat(kl_div)

            loss = -(torch.mean(log_ps) - torch.mean(kl_div))
            self.losslogger = loss

            # The loss should generally decrease with number of iterations, though it is not
            # guaranteed to decrease monotonically because at each iteration the set of
            # context points changes randomly.
            if iteration % print_freq == 0:
                print("Iteration " + str(iteration) + ":, Loss = {:.3f}".format(loss.item()))
                # We can set testing = True if we want to check that we are not over-fitting.
                if testing:
                    metrics_calculator(x_trains, y_trains, x_tests, y_tests, x_scalers,
                                       y_scalers, self.predict, dataname, plotting, iteration)
            loss.backward()
            self.optimiser.step()

    def predict(self, x_context, y_context, x_target):
        """
        :param x_context: A tensor of dimensions [batch_size, N_context, x_size].
                          When training N_context is randomly sampled between 3 and N_train;
                          when testing N_context = N_train
        :param y_context: A tensor of dimensions [batch_size, N_context, y_size]
        :param x_target: A tensor of dimensions [N_target, x_size]
        :return dist: The distributions over the predicted outputs y_target
        :return mu: A tensor of dimensionality [batch_size, N_target, output_size]
                    describing the means
                    of the normal distribution.
        :return var: A tensor of dimensionality [batch_size, N_target, output_size]
                     describing the variances of the normal distribution.
        """

        r = self.det_encoder.forward(x_context, y_context, x_target)
        # The latent encoder outputs a distribution over the latent embedding z.
        dists_z, _, _ = self.lat_encoder.forward(x_context, y_context)
        zs = [dist.sample() for dist in dists_z]  # [batch_size, r_size]
        z = torch.cat(zs)
        z = z.view(-1, self.r_size)

        # The input to the decoder is the concatenation of the target x values,
        # the deterministic embedding r and the latent variable z
        # the output is the predicted target y for each value of x.
        dist, mu, sigma = self.decoder.forward(x_target.float(), r.float(), z.float())
        return dist, mu, sigma
Exemplo n.º 22
0
    encoder = Encoder(args.d_input * args.LFR_m,
                      args.n_layers_enc,
                      args.n_head,
                      args.d_k,
                      args.d_v,
                      args.d_model,
                      args.d_inner,
                      dropout=args.dropout,
                      pe_maxlen=args.pe_maxlen)
    decoder = Decoder(
        sos_id,
        eos_id,
        vocab_size,
        args.d_word_vec,
        args.n_layers_dec,
        args.n_head,
        args.d_k,
        args.d_v,
        args.d_model,
        args.d_inner,
        dropout=args.dropout,
        tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing,
        pe_maxlen=args.pe_maxlen)
    model = Transformer(encoder, decoder)

    for i in range(3):
        print("\n***** Utt", i + 1)
        Ti = i + 20
        input = torch.randn(Ti, D)
        length = torch.tensor([Ti], dtype=torch.int)
        nbest_hyps = model.recognize(input, length, char_list, args)
Exemplo n.º 23
0
def train(log_dir, n_epochs, network_dict, index2token, **kwargs):
    onehot_words = kwargs['onehot_words']
    word_pos = kwargs['word_pos']
    sentence_lens_nchars = kwargs['sentence_lens_nchars']
    sentence_lens_nwords = kwargs['sentence_lens_nwords']
    vocabulary_size = kwargs['vocabulary_size']
    max_char_len = kwargs['max_char_len']
    onehot_words_val = kwargs['onehot_words_val']
    word_pos_val = kwargs['word_pos_val']
    sentence_lens_nchars_val = kwargs['sentence_lens_nchars_val']
    sentence_lens_nwords_val = kwargs['sentence_lens_nwords_val']

    batch_size = kwargs['batch_size']
    input_size = vocabulary_size
    hidden_size = kwargs['hidden_size']
    decoder_dim = kwargs['decoder_dim']
    decoder_units_p3 = kwargs['decoder_units_p3']
    num_batches = len(onehot_words) // batch_size
    network_dict['input_size'] = input_size

    max_word_len = np.max(sentence_lens_nwords)

    encoder_k = encoder.Encoder(**network_dict)

    #onehot_words,word_pos,vocabulary_size = encoder_k.run_preprocess()
    #prepping permutation matrix for all instances seperately
    perm_mat, max_lat_word_len, lat_sent_len_list = prep_perm_matrix(
        batch_size=batch_size,
        word_pos_matrix=word_pos,
        max_char_len=max_char_len)

    #placeholders
    mask_kl_pl = tf.placeholder(name='kl_pl_mask',
                                dtype=tf.float32,
                                shape=[batch_size, max_lat_word_len])
    sent_word_len_list_pl = tf.placeholder(name='word_lens',
                                           dtype=tf.int32,
                                           shape=[batch_size])
    perm_mat_pl = tf.placeholder(name='perm_mat_pl',
                                 dtype=tf.int32,
                                 shape=[batch_size, max_lat_word_len])
    onehot_words_pl = tf.placeholder(
        name='onehot_words',
        dtype=tf.float32,
        shape=[batch_size, max_char_len, vocabulary_size])
    word_pos_pl = tf.placeholder(name='word_pos',
                                 dtype=tf.float32,
                                 shape=[batch_size, max_char_len])
    sent_char_len_list_pl = tf.placeholder(name='sent_char_len_list',
                                           dtype=tf.float32,
                                           shape=[batch_size])
    #decoder
    arg_dict = {
        'decoder_p3_units': decoder_units_p3,
        'encoder_dim': hidden_size,
        'lat_word_dim': hidden_size,
        'sentence_lens': None,
        'global_lat_dim': hidden_size,
        'batch_size': batch_size,
        'max_num_lat_words': max_lat_word_len,
        'decoder_units': decoder_dim,
        'num_sentence_characters': max_char_len,
        'dict_length': vocabulary_size
    }
    decoder = Decoder(**arg_dict)

    #step counter
    global_step = tf.Variable(0, name='global_step', trainable=False)

    word_state_out, mean_state_out, logsig_state_out = encoder_k.run_encoder(
        sentence_lens=sent_char_len_list_pl,
        train=True,
        inputs=onehot_words_pl,
        word_pos=word_pos_pl,
        reuse=None)

    #picking out our words
    #why do these all start at 0?
    # replace 0's possibly with len+1
    ## RELYING ON THERE BEING NOTHING AT ZEROS
    #indice 0 problem?
    word_state_out.set_shape([max_char_len, batch_size, hidden_size])
    mean_state_out.set_shape([max_char_len, batch_size, hidden_size])
    logsig_state_out.set_shape([max_char_len, batch_size, hidden_size])
    word_state_out_p = permute_encoder_output(encoder_out=word_state_out,
                                              perm_mat=perm_mat_pl,
                                              batch_size=batch_size,
                                              max_word_len=max_lat_word_len)
    mean_state_out_p = permute_encoder_output(encoder_out=mean_state_out,
                                              perm_mat=perm_mat_pl,
                                              batch_size=batch_size,
                                              max_word_len=max_lat_word_len)
    logsig_state_out_p = permute_encoder_output(encoder_out=logsig_state_out,
                                                perm_mat=perm_mat_pl,
                                                batch_size=batch_size,
                                                max_word_len=max_lat_word_len)
    #Initialize decoder
    ##Note to self: need to input sentence lengths vector, also check to make sure all the placeholders flow into my class and tensorflow with ease

    out_o, global_latent_o, global_logsig_o, global_mu_o = decoder.run_decoder(
        word_sequence_length=sent_word_len_list_pl,
        train=True,
        reuse=None,
        units_lstm_decoder=decoder_dim,
        lat_words=word_state_out_p,
        units_dense_global=decoder_dim,
        char_sequence_length=tf.cast(sent_char_len_list_pl, dtype=tf.int32))

    # shaping for batching
    #reshape problem

    onehot_words = np.reshape(
        onehot_words, newshape=[-1, batch_size, max_char_len, vocabulary_size])
    word_pos = np.reshape(word_pos, newshape=[-1, batch_size, max_char_len])
    # making word masks for kl term
    kl_mask = []
    print(sentence_lens_nwords)
    for word_len in np.reshape(lat_sent_len_list, -1):
        vec = np.zeros([max_lat_word_len], dtype=np.float32)
        vec[0:word_len] = np.ones(shape=word_len, dtype=np.float32)
        kl_mask.append(vec)
    kl_mask = np.asarray(kl_mask)
    kl_mask = np.reshape(kl_mask, newshape=[-1, batch_size, max_lat_word_len])
    sentence_lens_nwords = np.reshape(sentence_lens_nwords,
                                      newshape=[-1, batch_size])
    sentence_lens_nchars = np.reshape(sentence_lens_nchars,
                                      newshape=[-1, batch_size])
    lat_sent_len_list = np.reshape(lat_sent_len_list, [-1, batch_size])

    #shaping for validation set
    batch_size_val = batch_size
    n_valid = np.shape(onehot_words_val)[0]

    r = n_valid % batch_size_val
    n_valid_use = n_valid - r
    #might have to fix this before reporting results
    onehot_words_val = np.reshape(
        onehot_words_val[0:n_valid_use, ...],
        newshape=[-1, batch_size_val, max_char_len, vocabulary_size])
    word_pos_val = np.reshape(word_pos_val[0:n_valid_use, ...],
                              newshape=[-1, batch_size_val, max_char_len])
    #sentence_lens_nwords_val = np.reshape(sentence_lens_nwords_val[0:n_valid_use],newshape=[-1,batch_size_val])
    sentence_lens_nchars_val = np.reshape(
        sentence_lens_nchars_val[0:n_valid_use], newshape=[-1, batch_size_val])

    ###KL annealing parameters
    shift = 5000
    total_steps = np.round(np.true_divide(n_epochs, 20) *
                           np.shape(onehot_words)[0],
                           decimals=0)

    ####
    cost, reconstruction, kl_p3, kl_p1, kl_global, kl_p2, anneal, _ = decoder.calc_cost(
        eow_mask=None,
        mask_kl=mask_kl_pl,
        kl=True,
        sentence_word_lens=sent_word_len_list_pl,
        shift=shift,
        total_steps=total_steps,
        global_step=global_step,
        global_latent_sample=global_latent_o,
        global_logsig=global_logsig_o,
        global_mu=global_mu_o,
        predictions=out_o,
        true_input=onehot_words_pl,
        posterior_logsig=logsig_state_out_p,
        posterior_mu=mean_state_out_p,
        post_samples=word_state_out_p,
        reuse=None)

    ######
    # Train Step

    # clipping gradients
    ######
    lr = 1e-4
    opt = tf.train.AdamOptimizer(lr)
    grads_t, vars_t = zip(*opt.compute_gradients(cost))
    clipped_grads_t, grad_norm_t = tf.clip_by_global_norm(grads_t,
                                                          clip_norm=5.0)
    train_step = opt.apply_gradients(zip(clipped_grads_t, vars_t),
                                     global_step=global_step)
    regex = re.compile('[^a-zA-Z]')
    #sum_grad_hist = [tf.summary.histogram(name=regex.sub('',str(j)),values=i) for i,j in zip(clipped_grads_t,vars_t)]
    norm_grad = tf.summary.scalar(name='grad_norm', tensor=grad_norm_t)

    ######
    #testing stuff
    #testing pls
    sent_word_len_list_pl_val = tf.placeholder(name='word_lens_val',
                                               dtype=tf.int32,
                                               shape=[batch_size])
    perm_mat_pl_val = tf.placeholder(name='perm_mat_val',
                                     dtype=tf.int32,
                                     shape=[batch_size, max_lat_word_len])
    onehot_words_pl_val = tf.placeholder(
        name='onehot_words_val',
        dtype=tf.float32,
        shape=[batch_size, max_char_len, vocabulary_size])
    word_pos_pl_val = tf.placeholder(name='word_pos_val',
                                     dtype=tf.float32,
                                     shape=[batch_size, max_char_len])
    sent_char_len_list_pl_val = tf.placeholder(name='sent_char_len_list_val',
                                               dtype=tf.float32,
                                               shape=[batch_size])

    #testing graph
    word_state_out_val, mean_state_out_val, logsig_state_out_val = encoder_k.run_encoder(
        sentence_lens=sent_char_len_list_pl_val,
        train=False,
        inputs=onehot_words_pl_val,
        word_pos=word_pos_pl_val,
        reuse=True)
    perm_mat_val, _, lat_sent_len_list_val = prep_perm_matrix(
        batch_size=batch_size_val,
        word_pos_matrix=word_pos_val,
        max_char_len=max_char_len,
        max_word_len=max_lat_word_len)
    kl_mask_val = []
    for word_len in np.reshape(lat_sent_len_list_val, -1):
        vec = np.zeros([max_lat_word_len], dtype=np.float32)
        vec[0:word_len] = np.ones(shape=word_len, dtype=np.float32)
        kl_mask_val.append(vec)
    kl_mask_val = np.asarray(kl_mask_val)
    kl_mask_val = np.reshape(kl_mask_val,
                             newshape=[-1, batch_size, max_lat_word_len])

    lat_sent_len_list_val = np.reshape(np.reshape(lat_sent_len_list_val,
                                                  -1)[0:n_valid_use],
                                       newshape=[-1, batch_size_val])
    word_state_out_val.set_shape([max_char_len, batch_size_val, hidden_size])
    mean_state_out_val.set_shape([max_char_len, batch_size_val, hidden_size])
    logsig_state_out.set_shape([max_char_len, batch_size_val, hidden_size])
    word_state_out_p_val = permute_encoder_output(
        encoder_out=word_state_out_val,
        perm_mat=perm_mat_pl_val,
        batch_size=batch_size_val,
        max_word_len=max_lat_word_len)
    mean_state_out_p_val = permute_encoder_output(
        encoder_out=mean_state_out_val,
        perm_mat=perm_mat_pl_val,
        batch_size=batch_size_val,
        max_word_len=max_lat_word_len)
    logsig_state_out_p_val = permute_encoder_output(
        encoder_out=logsig_state_out_val,
        perm_mat=perm_mat_pl_val,
        batch_size=batch_size_val,
        max_word_len=max_lat_word_len)
    out_o_val, global_latent_o_val, global_logsig_o_val, global_mu_o_val = decoder.run_decoder(
        word_sequence_length=sent_word_len_list_pl_val,
        train=False,
        reuse=True,
        units_lstm_decoder=decoder_dim,
        lat_words=mean_state_out_p_val,
        units_dense_global=decoder.global_lat_dim,
        char_sequence_length=tf.cast(sent_char_len_list_pl_val,
                                     dtype=tf.int32))
    #test cost
    test_cost = decoder.test_calc_cost(
        mask_kl=mask_kl_pl,
        sentence_word_lens=sent_word_len_list_pl_val,
        posterior_logsig=logsig_state_out_p_val,
        post_samples=word_state_out_p_val,
        global_mu=global_mu_o_val,
        global_logsig=global_logsig_o_val,
        global_latent_sample=global_latent_o_val,
        posterior_mu=mean_state_out_p_val,
        true_input=onehot_words_pl_val,
        predictions=out_o_val)

    ######

    ######
    #prior sampling
    samples = np.random.normal(size=[batch_size, decoder.global_lat_dim])
    gen_samples = decoder.generation(samples=samples)

    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    ### IW eval
    NLL, bpc = n_samples_IW(n_samples=10,
                            encoder=encoder_k,
                            decoder=decoder,
                            decoder_dim=decoder_dim,
                            sent_char_len_list_pl=sent_char_len_list_pl_val,
                            true_output=onehot_words_pl_val,
                            onehot_words_pl=onehot_words_pl_val,
                            word_pos_pl=word_pos_pl_val,
                            perm_mat_pl=perm_mat_pl_val,
                            batch_size=batch_size,
                            max_lat_word_len=max_lat_word_len,
                            sent_word_len_list_pl=sent_word_len_list_pl_val)
    sum_NLL = tf.summary.scalar(tensor=NLL, name='10sample_IWAE_LL')
    sum_bpc = tf.summary.scalar(tensor=bpc, name='10sample_IWAE_BPC')
    ###

    ######
    #tensorboard stuff
    summary_inf_train = tf.summary.merge([
        norm_grad, decoder.kls_hist, decoder.global_kl_scalar,
        decoder.rec_scalar, decoder.cost_scalar, decoder.full_kl_scalar,
        decoder.sum_all_activ_hist, decoder.sum_global_activ_hist
    ])
    summary_inf_test = tf.summary.merge(
        [sum_NLL, sum_bpc, decoder.sum_rec_val, decoder.sum_kl_val])
    summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
    ######
    log_file = log_dir + "vaelog.txt"
    logger = logging.getLogger('mVAE_log')
    hdlr = logging.FileHandler(log_file)
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    hdlr.setFormatter(formatter)
    logger.addHandler(hdlr)
    logger.setLevel(logging.DEBUG)
    for epoch in range(n_epochs):
        inds = range(np.shape(onehot_words)[0])
        np.random.shuffle(inds)
        for count, batch in enumerate(inds):
            anneal_c_o, train_predictions_o_np, train_cost_o_np, _, global_step_o_np, train_rec_cost_o_np, _, _, _, _, summary_inf_train_o = sess.run(
                [
                    anneal, out_o, cost, train_step, global_step,
                    reconstruction, kl_p3, kl_p1, kl_global, kl_p2,
                    summary_inf_train
                ],
                feed_dict={
                    mask_kl_pl: kl_mask[batch],
                    onehot_words_pl: onehot_words[batch],
                    word_pos_pl: word_pos[batch],
                    perm_mat_pl: perm_mat[batch],
                    sent_word_len_list_pl: lat_sent_len_list[batch],
                    sent_char_len_list_pl: sentence_lens_nchars[batch]
                })
            #logger.debug('anneal const {}'.format(anneal_c))
            #logger.debug('ground truth {}'.format(get_output_sentences(index2token, ground_truth[0:10])))
            if global_step_o_np % 1 == 0:
                # testing on the validation set
                rind = np.random.randint(low=0,
                                         high=np.shape(onehot_words_val)[-1])
                val_predictions_o_np, val_cost_o_np, summary_inf_test_o = sess.run(
                    [out_o_val, test_cost, summary_inf_test],
                    feed_dict={
                        mask_kl_pl: kl_mask_val[rind],
                        onehot_words_pl_val: onehot_words_val[rind],
                        word_pos_pl_val: word_pos_val[rind],
                        perm_mat_pl_val: perm_mat_val[rind],
                        sent_word_len_list_pl_val: lat_sent_len_list_val[rind],
                        sent_char_len_list_pl_val:
                        sentence_lens_nchars_val[rind]
                    })

                predictions = np.argmax(train_predictions_o_np[0:10], axis=-1)
                ground_truth = np.argmax(onehot_words[batch][0:10], axis=-1)
                val_predictions = np.argmax(val_predictions_o_np, axis=-1)
                true = np.argmax(onehot_words_val[rind], -1)
                num = np.sum([
                    np.sum(val_predictions[j][0:i] == true[j][0:i])
                    for j, i in enumerate(sentence_lens_nchars_val[rind])
                ])

                denom = np.sum(sentence_lens_nchars_val[rind])
                accuracy = np.true_divide(num, denom) * 100
                logger.debug(
                    'accuracy on random val batch {}'.format(accuracy))
                logger.debug('predictions {}'.format(
                    [[index2token[j] for j in i]
                     for i in predictions[0:10, 0:50]]))
                logger.debug('ground truth {}'.format(
                    [[index2token[j] for j in i]
                     for i in ground_truth[0:10, 0:50]]))
                logger.debug(
                    'global step: {} Epoch: {} count: {} anneal:{}'.format(
                        global_step_o_np, epoch, count, anneal_c_o))
                logger.debug('train cost: {}'.format(train_cost_o_np))
                logger.debug('validation cost {}'.format(val_cost_o_np))
                logger.debug('validation predictions {}'.format(
                    [[index2token[j] for j in i]
                     for i in val_predictions[0:10, 0:50]]))
                summary_writer.add_summary(summary_inf_test_o,
                                           global_step_o_np)
                summary_writer.flush()
            if global_step_o_np % 1000 == 0:
                # testing on the generative model
                gen_o_np = sess.run([gen_samples])
                gen_pred = np.argmax(gen_o_np[0:10], axis=-1)
                logger.debug('GEN predictions {}'.format(
                    [[index2token[j] for j in i]
                     for i in gen_pred[0][0:10, 0:50]]))

            summary_writer.add_summary(summary_inf_train_o, global_step_o_np)
            summary_writer.flush()
Exemplo n.º 24
0
        OP_SUB: 3,
        OP_MUL: 3,
        OP_DIV: 3,
        OP_MOD: 3,
        OP_AND: 3,
        OP_OR: 3,
        OP_LT: 3,
        OP_GT: 3,
        OP_EQ: 3,
        OP_NOT: 1,
        OP_JMP: 2,
        OP_STORE: 2,
        OP_ENDGA: 1,
        OP_NOP: 1
    }


if __name__ == "__main__":
    from binary_reader import BinaryReader
    from decoder import Decoder
    from memory import Memory

    bin_instructions = BinaryReader.read_instructions(
        "../../sdvu/cfg/adding.6.out.2")
    for instr in bin_instructions:
        print(hex(instr))
    memory = Memory(128, 0x22221111333333332222222200000001)
    simulator = Core(Decoder(bin_instructions), 2, 128)
    simulator.setup_cfg_memory(memory)
    simulator.process_instructions()
Exemplo n.º 25
0
class Simulation:
    def __init__(self):
        self.init_corpus()

    def init_corpus(self):
        self.corpus = []
        lines = open('corpus.txt').readlines()
        for i in range(20000):
            tags = lines[i].split(' ')
            word = tags[0]
            pri = float(tags[1])
            self.corpus.append([word, pri])

    def calc_letter_distribution(self, **kwargs):
        data_list = kwargs['data_list']
        task_list = kwargs['task_list']
        assert (len(data_list) == len(task_list))

        QWERTY = ['QWERTYUIOP', 'ASDFGHJKL', 'ZXCVBNM']
        self.letter_positions = [[-1, -1] for i in range(26)]
        self.letter_fingers = np.zeros((26, 10))
        self.letter_distributions = [
            [[-1, -1, 0.1, 0.1, 0.1, 0] for finger in range(10)]
            for alpha in range(26)
        ]  # Formal = [xc, yc, std_x2, std_y2, std_xy, p]
        for r in range(3):
            line = QWERTY[r]
            for c in range(len(line)):
                ch = line[c]
                alpha = ord(ch) - ord('A')
                self.letter_positions[alpha] = [c, r]

        features = [[[] for finger in range(10)] for alpha in range(26)]
        for data, task in zip(data_list, task_list):
            assert (len(data) == len(task))
            for i in range(len(task)):
                letter = task[i]
                if letter.isalpha():
                    alpha = ord(letter) - ord('a')
                    feature = Decoder.get_feature(data[i])
                    finger = Decoder.get_finger(data[i])
                    # finger = 0 # If not using fingering model
                    features[alpha][finger].append(feature)

        for alpha in range(26):
            for finger in range(10):
                points = np.array(features[alpha][finger])
                if len(points) >= 1:
                    self.letter_fingers[alpha][finger] += len(points)
                    X = points[:, 0]
                    Y = points[:, 1]

                    if len(points) >= 5:  # Remove > 3_std
                        n_std = 3
                        xc, x_std = np.mean(X), np.std(X)
                        yc, y_std = np.mean(Y), np.std(Y)
                        pack = zip(X.copy(), Y.copy())
                        X = []
                        Y = []
                        for x, y in pack:
                            if abs(x - xc) <= n_std * x_std and abs(
                                    y - yc) <= n_std * y_std:
                                X.append(x)
                                Y.append(y)

                    xc = np.mean(X)
                    yc = np.mean(Y)

                    #plt.scatter(X, Y, color=('C'+str(alpha)), s = 5)
                    #plt.scatter(xc, yc, color='red', s = 10)

                    cov = np.array([[0.1, 0], [0, 0.1]])
                    if len(points) >= 5:
                        cov = np.cov(np.array([X, Y]))

                    std_x2 = cov[0, 0]
                    std_y2 = cov[1, 1]
                    std_xy = (std_x2**0.5) * (std_y2**0.5)
                    p = cov[0, 1] / std_xy
                    assert (not (np.isnan(std_x2) or np.isnan(std_y2)
                                 or np.isnan(std_xy)))
                    self.letter_distributions[alpha][finger] = [
                        xc, yc, std_x2, std_y2, std_xy, p
                    ]

            if sum(self.letter_fingers[alpha]) != 0:
                self.letter_fingers[alpha] /= sum(self.letter_fingers[alpha])
                std_fingering = np.argmax(self.letter_fingers[alpha])
                for finger in range(10):
                    if self.letter_fingers[alpha][finger] == 0:
                        self.letter_distributions[alpha][
                            finger] = self.letter_distributions[alpha][
                                std_fingering].copy()
                    self.letter_fingers[alpha][finger] = max(
                        self.letter_fingers[alpha][finger], 0.001)

        #plt.show()
        pickle.dump([
            self.letter_positions, self.letter_fingers,
            self.letter_distributions
        ], open('models/touch.model', 'wb'))
        self.decoder = Decoder()

    def input(self):
        nums = sys.argv[1].split('-')
        assert (len(nums) == 2)
        if nums[0].isdigit():
            users = [int(nums[0])]
        else:
            users = [1, 2, 3, 4, 5, 6, 8, 9, 10, 12]  #range(1, 13)
        if nums[1].isdigit():
            sessions = [int(nums[1])]
        else:
            sessions = range(1, 6)

        N = 20
        task_list = []
        inputted_list = []
        data_list = []

        for user in users:
            for session in sessions:
                folder_path = 'data-study1/' + str(user) + '-' + str(
                    session) + '/'
                for i in range(N):
                    file_path = folder_path + str(i) + '.pickle'
                    if os.path.exists(file_path):
                        [task, inputted,
                         data] = pickle.load(open(file_path, 'rb'))
                        assert (len(inputted) == len(data)
                                and len(data) == len(data))
                        task_list.append(task)
                        inputted_list.append(inputted)
                        data_list.append(data)

        return task_list, inputted_list, data_list

    def run(self):
        task_list, inputted_list, data_list = self.input()
        self.calc_letter_distribution(data_list=data_list, task_list=task_list)

        ranks = []
        for task, inputted, data in zip(task_list, inputted_list, data_list):
            words = task.split()
            begin = 0
            for word in words:
                end = begin + len(word)

                enter = inputted[begin:end]
                word_data = data[begin:end]
                if enter == word:
                    pred, rank = self.decoder.predict(word_data, task[:end],
                                                      word)
                    ranks.append(rank)

                begin = end + 1

        print('=====   Top-5 accuracy   =====')
        ranks = np.array(ranks)
        probs = []
        for i in range(5):
            #prob = sum(ranks == i+1) / len(ranks)
            prob = sum(ranks == i + 1) / sum(ranks != -1)
            print('Rank %d = %f' % (i + 1, prob))
            if i == 0:
                TOP_1 = prob

        return TOP_1
Exemplo n.º 26
0
            BUFFER_SIZE = len(d_tensor_train)
            steps_per_epoch = len(d_tensor_train) // BATCH_SIZE + 1
            vocab_size = len(tokenizer.word_index) + 1

            # create tf.dataset
            dataset = tf.data.Dataset.from_tensor_slices(
                (dia_train, res_train, sid_train,
                 aid_train)).shuffle(BUFFER_SIZE)
            dataset = dataset.batch(BATCH_SIZE, drop_remainder=False)
            example_input_batch, example_target_batch, example_sid_batch, example_aid_batch = next(
                iter(dataset))
            print("Create dataset done.")

            encoder = Encoder(HIDDEN_SIZE, vocab_size, embedding_dim,
                              NUM_LAYER, BATCH_SIZE)
            decoder = Decoder(HIDDEN_SIZE, vocab_size, embedding_dim,
                              speaker_dim, NUM_LAYER)
            optimizer = tf.keras.optimizers.Adam()

            # train_nn_s = Train(encoder, decoder, optimizer, tokenizer)      # speaker mode
            train_nn_sa = Train(encoder, decoder, optimizer,
                                tokenizer)  # speaker-addressee mode

            # cp = tf.train.Checkpoint(optimizer=optimizer,
            #                          encoder=encoder,
            #                          decoder=decoder)
            # status = cp.restore("persona_checkpoint/speaker-ckpt-5")
            # print(status)
            # train_nn_s = Train(encoder, decoder, optimizer, tokenizer)  # speaker mode restore

            print("Start training")
            #################################### test Way ###########################################
Exemplo n.º 27
0
    if parsed_args.port == []:
        parsed_args.port = [DEFAULT_TCP_PORT + i for i in range(num_bundles)]

    elif len(parsed_args.port) == 1 and 1 < num_bundles:
        initial_port = parsed_args.port[0]
        parsed_args.port = [initial_port + i for i in range(num_bundles)]

    elif len(parsed_args.port) != num_bundles:
        sys.stderr.write('ERROR: TCP ports specified incorrectly.\n')
        cli_parser.print_help()
        sys.exit(2)

    return parsed_args


api.add_resource(TranslationEngine,
                 '/joshua/translate/<string:target_lang_code>')

if __name__ == '__main__':

    args = handle_cli_args(sys.argv)
    for idx, bundle_confs in enumerate(zip(args.bundle_dir, args.port)):
        bundle, port = bundle_confs
        decoder = Decoder(bundle, port)
        decoder.start_decoder_server()
        lang_pair = (args.source_lang[idx], args.target_lang[idx])
        decoders[lang_pair] = decoder

    app.run(debug=True, use_reloader=False)
    #app.run()
Exemplo n.º 28
0
cli.connexion.close()
serv.th_Listen.join()
serv.mySocket.close()

# pour laisser le temps au message associé à la fermeture de la connexion du 
# client de se print correctement
sleep(0.1)

log.debug("Thread d'écoute du serveur supprimé.")
log.debug("Serveur supprimé.\n\n")


# # # -------------------------DATA DECODING TO IMAGE-------------------------- # # #


dec = Decoder()

# bitstream (données reçues) --> frame RLE
dec_rle_data = dec.decode_bitstream_RLE(received_data)
t_fin_conversion_bitstream_recu_RLE = time()
duree_conversion_bitstream_recu_RLE = t_fin_conversion_bitstream_recu_RLE - t_fin_conversion_RLE_bitstream_et_passage_reseau

log.debug(f"Transmission réseau réussie : {str(rle_data == dec_rle_data).upper()}\n")

# frame RLE --> frame YUV
dec_yuv_data = dec.recompose_frame_via_iDTT(dec_rle_data, img_size, macroblock_size, P, S)
t_fin_conversion_RLE_YUV = time()
duree_conversion_RLE_YUV = t_fin_conversion_RLE_YUV - t_fin_conversion_bitstream_recu_RLE

# frame YUV --> frame RGB
dec_rgb_data = dec.YUV_to_RGB(np.array(dec_yuv_data, dtype=float))
Exemplo n.º 29
0
sample_output, sample_hidden = ans_sent_encoder(example_ans_sent_batch,
                                                training=True)
tf.debugging.assert_shapes([(sample_output, (BATCH_SIZE, max_length_inp,
                                             units))])
if layer == 1:
    tf.debugging.assert_shapes([(sample_hidden, (BATCH_SIZE, units))])
else:
    tf.debugging.assert_shapes([(sample_hidden, (layer, BATCH_SIZE, units))])

decoder = Decoder(vocab_tar_size,
                  embedding_dim,
                  units,
                  BATCH_SIZE,
                  targ_tokenizer.word_index['<start>'],
                  targ_tokenizer.word_index['<end>'],
                  attention_type='luong',
                  max_length_inp=max_length_inp,
                  max_length_targ=max_length_targ,
                  embedding_matrix=targ_embedding_matrix,
                  pretraine_embeddings=pretrained,
                  num_layers=layer,
                  dropout=dropout)
sample_x = tf.random.uniform((BATCH_SIZE, max_length_targ),
                             dtype=tf.dtypes.float32)
decoder.attention_mechanism.setup_memory(sample_output)
initial_state = decoder.build_initial_state(BATCH_SIZE, sample_hidden,
                                            tf.float32)
sample_decoder_outputs = decoder(sample_x, initial_state, training=True)
tf.debugging.assert_shapes([(sample_decoder_outputs.rnn_output,
                             (BATCH_SIZE, max_length_targ - 1, vocab_tar_size))
                            ])
Exemplo n.º 30
0
 def __init__(self):
     self.decoder = Decoder()
     self.unpacker = msgpack.Unpacker(object_pairs_hook=OrderedDict)
Exemplo n.º 31
0
 def __init__(self):
     self.decoder = Decoder()
Exemplo n.º 32
0
from kaitai.photon import Photon
from decoder import Decoder
from scapy.all import sniff, raw
from py2neo import Graph, Node, Relationship
from ogm import get_item, get_character
from datetime import datetime
from utility import convert_to_ts, alert
from items import item_dict
import json

decoder = Decoder()
graph = Graph(password='******')


def preprocess_message(m):
    m = m.replace(b'true', b'True')
    m = m.replace(b'false', b'False')
    m = m.replace(b'null', b'None')
    return eval(m)


def handle_messages(messages):
    for m in messages:
        m    = preprocess_message(m)
        item = get_item(m)
        char = get_character(m)

        msg = Relationship(
	        char,
	        m['AuctionType'],
	        item,
Exemplo n.º 33
0
	def __init__(self,wavfile,transcription):
		self.wavfile = wavfile
		Decoder.__init__(self,wavfile,transcription)
Exemplo n.º 34
0
def main(args):
    # Construct Solver
    # data
    tr_dataset = AudioDataset(args.train_json, args.batch_size, args.maxlen_in,
                              args.maxlen_out)
    cv_dataset = AudioDataset(args.valid_json, args.batch_size, args.maxlen_in,
                              args.maxlen_out)
    tr_loader = AudioDataLoader(tr_dataset,
                                batch_size=1,
                                num_workers=args.num_workers)
    cv_loader = AudioDataLoader(cv_dataset,
                                batch_size=1,
                                num_workers=args.num_workers)
    # load dictionary and generate char_list, sos_id, eos_id
    char_list, sos_id, eos_id = process_dict(args.dict)
    vocab_size = len(char_list)
    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}
    # model
    #import pdb
    #pdb.set_trace()
    encoder = Encoder(args.einput,
                      args.ehidden,
                      args.elayer,
                      vocab_size,
                      dropout=args.edropout,
                      bidirectional=args.ebidirectional,
                      rnn_type=args.etype,
                      ctc_las=True)
    decoder = Decoder(vocab_size,
                      args.dembed,
                      sos_id,
                      eos_id,
                      args.dhidden,
                      args.dlayer,
                      bidirectional_encoder=args.ebidirectional)
    #lstm_model = Lstmctc.load_model(args.continue_from)

    model = Seq2Seq(encoder, decoder, ctc_las=True)
    model_dict = model.state_dict()
    print(model)
    #print(lstm_model)
    pretrained_dict = torch.load(args.ctc_model)
    #pretrained_dict = {k: v for k, v in pretrained_dict['state_dict'].items() if k in model_dict}
    pretrained_dict = {(k.replace('lstm', 'encoder')): v
                       for k, v in pretrained_dict['state_dict'].items()
                       if (k.replace('lstm', 'encoder')) in model_dict}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    for k, v in model.named_parameters():
        if k.startswith("encoder"):
            print(k)
            v.requires_grad = False
    model.cuda()
    # optimizer
    if args.optimizer == 'sgd':
        optimizier = torch.optim.SGD(model.parameters(),
                                     lr=args.lr,
                                     momentum=args.momentum,
                                     weight_decay=args.l2)
    elif args.optimizer == 'adam':
        optimizier = torch.optim.Adam(model.parameters(),
                                      lr=args.lr,
                                      weight_decay=args.l2)
    else:
        print("Not support optimizer")
        return

    # solver
    ctc = 0
    solver = Solver(data, model, optimizier, ctc, args)
    solver.train()
Exemplo n.º 35
0
 def __init__(self):
     """Constructor"""
     self.generator = Generator()
     self.decoder = Decoder()
Exemplo n.º 36
0
def test():
    '''
    main function to run the testing
    '''
    encoder = Encoder(encoder_params[0], encoder_params[1]).cuda()
    decoder = Decoder(decoder_params[0], decoder_params[1]).cuda()
    net = ED(encoder, decoder)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    if torch.cuda.device_count() > 1:
        net = nn.DataParallel(net)
    net.to(device)

    # 加载待测试模型
    if os.path.exists(args.model_path):
        # load existing model
        print('==> loading existing model ' + args.model_path)
        model_info = torch.load(args.model_path)
        net.load_state_dict(model_info['state_dict'])
        model_dir = args.model_path.split('/')[-2]
    else:
        raise Exception("Invalid model path!")

    # 创建存储可视化图片的路径
    if not os.path.isdir(args.vis_dir):
        os.makedirs(args.vis_dir)

    class_weights = torch.FloatTensor([1.0, 15.0]).cuda()
    lossfunction = nn.CrossEntropyLoss(weight=class_weights).cuda()
    # to track the testing loss as the model testing
    test_losses = []
    # to track the average testing loss per epoch as the model testing
    avg_test_losses = []

    ######################
    # test the model #
    ######################
    with torch.no_grad():
        net.eval()  # 将module设置为 eval mode,只影响dropout和batchNorm
        # tqdm 进度条
        t = tqdm(testLoader, total=len(testLoader))
        for i, (seq_len, scan_seq, label_seq, mask_seq,
                label_id) in enumerate(t):
            # 序列长度不固定,至少前2帧用来输入,固定预测后3帧
            inputs = inputs = torch.cat((scan_seq, mask_seq.float()),
                                        dim=2).to(device)[:, :-3,
                                                          ...]  # B,S,C,H,W
            label = mask_seq.to(device)[:, (seq_len - 3):, ...]  # B,S,C,H,W
            pred = net(inputs)
            SaveVis(model_dir, i, scan_seq.to(device), mask_seq.to(device),
                    pred)
            seq_number, batch_size, input_channel, height, width = pred.size()
            pred = pred.reshape(-1, input_channel, height,
                                width)  # reshape to B*S,C,H,W
            seq_number, batch_size, input_channel, height, width = label.size()
            label = label.reshape(-1, height, width)  # reshape to B*S,H,W
            label = label.to(device=device, dtype=torch.long)
            loss = lossfunction(pred, label)
            loss_aver = loss.item() / (label.shape[0])
            # record test loss
            test_losses.append(loss_aver)
            t.set_postfix({
                'test_loss': '{:.6f}'.format(loss_aver),
                'cnt': '{:02d}'.format(i)
            })
            # 参数中限制了要测试的样本数量
            if i >= args.sample and args.sample > 0:
                break

    torch.cuda.empty_cache()
    # print test statistics
    # calculate average loss over an epoch
    test_loss = np.average(test_losses)
    avg_test_losses.append(test_loss)

    # epoch_len = len(str(args.epochs))

    test_losses = []
Exemplo n.º 37
0
    if hyperP['load_pretrain_code_embed']:
        model.decoder.embed[0].load_state_dict(torch.load('./pretrain_code_lm/embedding-1556211835.t7'))
        if hyperP['freeze_embed']:
            for param in model.decoder.embed[0].parameters():
                param.requires_grad = False

    model_type = ""
    if args.atten:
        model_type = "atten"
    else:
        model_type = "simple"
    model_path = "./models/models_%s_%s"%(args.data_mode, model_type)
    model.load(os.path.join(model_path, "model_100.t7"))
    # model.load('model_100.t7')
    beam_decoder = Decoder(model, model_type=model_type)
    if is_cuda:
        model.to(device)
        # beam_decoder.to(device)
    model.eval()

    # input('check gpu location')
    sos = special_symbols['code_sos']
    eos = special_symbols['code_eos']
    unk = special_symbols['code_unk']
    idx2code = code_intent_pair.idx2code
    intent2idx = code_intent_pair.intent2idx

    dummy_code_list = []
    true_code_list = []
Exemplo n.º 38
0
class PointerNetwork(object):
    def __init__(self, max_len, batch_size, num_units=32, input_size=1):
        self.encoder = Encoder(num_units=num_units)
        self.decoder = Decoder(num_units=num_units)

        self.encoder_inputs = []
        self.decoder_inputs = []
        self.decoder_targets = []
        self.target_weights = []

        self.input_size = input_size
        self.batch_size = batch_size

        for i in range(max_len):
            self.encoder_inputs.append(
                tf.placeholder(tf.float32, [batch_size, input_size],
                               name="EncoderInput%d" % i))
        for i in range(max_len + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.float32, [batch_size, input_size],
                               name="DecoderInput%d" % i))
            self.decoder_targets.append(
                tf.placeholder(tf.int32, [batch_size, 1],
                               name="DecoderTarget%d" % i))
            self.target_weights.append(
                tf.placeholder(tf.float32, [batch_size, 1],
                               name="TargetWeight%d" % i))

    def create_feed_dict(self, encoder_input_data, decoder_input_data,
                         decoder_target_data):
        feed_dict = {}
        for placeholder, data in zip(self.encoder_inputs, encoder_input_data):
            feed_dict[placeholder] = data

        for placeholder, data in zip(self.decoder_inputs, decoder_input_data):
            feed_dict[placeholder] = data

        for placeholder, data in zip(self.decoder_targets,
                                     decoder_target_data):
            feed_dict[placeholder] = data

        for placeholder in self.target_weights:
            feed_dict[placeholder] = np.ones([self.batch_size, 1])

        return feed_dict

    def build(self, feed_prev=False):
        encoder_outputs, final_state = self.encoder.encode(self.encoder_inputs)
        encoder_inputs = [tf.zeros([self.batch_size, 1])] + self.encoder_inputs

        decoder_inputs = self.decoder_inputs if not feed_prev else [
            self.decoder_inputs[0]
        ] * len(self.decoder_inputs)
        outputs, states, inps = self.decoder.decode(decoder_inputs,
                                                    final_state,
                                                    encoder_outputs,
                                                    encoder_inputs, feed_prev)

        outputs = [tf.expand_dims(e, 1) for e in outputs]

        outputs = tf.concat(outputs, 1)
        targets = tf.concat(self.decoder_targets, 1)
        weights = tf.concat(self.target_weights, 1)

        print(outputs, targets, weights)
        loss = melt.seq2seq.sequence_loss_by_example(outputs, targets, weights)
        loss = tf.reduce_mean(loss)

        predicts = tf.to_int32(tf.argmax(outputs, 2))

        correct_predict_ratio = tf.reduce_mean(
            tf.to_float(melt.sequence_equal(predicts, targets)))

        return loss, correct_predict_ratio, predicts, targets
Exemplo n.º 39
0
    def calc_letter_distribution(self, **kwargs):
        data_list = kwargs['data_list']
        task_list = kwargs['task_list']
        assert (len(data_list) == len(task_list))

        QWERTY = ['QWERTYUIOP', 'ASDFGHJKL', 'ZXCVBNM']
        self.letter_positions = [[-1, -1] for i in range(26)]
        self.letter_fingers = np.zeros((26, 10))
        self.letter_distributions = [
            [[-1, -1, 0.1, 0.1, 0.1, 0] for finger in range(10)]
            for alpha in range(26)
        ]  # Formal = [xc, yc, std_x2, std_y2, std_xy, p]
        for r in range(3):
            line = QWERTY[r]
            for c in range(len(line)):
                ch = line[c]
                alpha = ord(ch) - ord('A')
                self.letter_positions[alpha] = [c, r]

        features = [[[] for finger in range(10)] for alpha in range(26)]
        for data, task in zip(data_list, task_list):
            assert (len(data) == len(task))
            for i in range(len(task)):
                letter = task[i]
                if letter.isalpha():
                    alpha = ord(letter) - ord('a')
                    feature = Decoder.get_feature(data[i])
                    finger = Decoder.get_finger(data[i])
                    # finger = 0 # If not using fingering model
                    features[alpha][finger].append(feature)

        for alpha in range(26):
            for finger in range(10):
                points = np.array(features[alpha][finger])
                if len(points) >= 1:
                    self.letter_fingers[alpha][finger] += len(points)
                    X = points[:, 0]
                    Y = points[:, 1]

                    if len(points) >= 5:  # Remove > 3_std
                        n_std = 3
                        xc, x_std = np.mean(X), np.std(X)
                        yc, y_std = np.mean(Y), np.std(Y)
                        pack = zip(X.copy(), Y.copy())
                        X = []
                        Y = []
                        for x, y in pack:
                            if abs(x - xc) <= n_std * x_std and abs(
                                    y - yc) <= n_std * y_std:
                                X.append(x)
                                Y.append(y)

                    xc = np.mean(X)
                    yc = np.mean(Y)

                    #plt.scatter(X, Y, color=('C'+str(alpha)), s = 5)
                    #plt.scatter(xc, yc, color='red', s = 10)

                    cov = np.array([[0.1, 0], [0, 0.1]])
                    if len(points) >= 5:
                        cov = np.cov(np.array([X, Y]))

                    std_x2 = cov[0, 0]
                    std_y2 = cov[1, 1]
                    std_xy = (std_x2**0.5) * (std_y2**0.5)
                    p = cov[0, 1] / std_xy
                    assert (not (np.isnan(std_x2) or np.isnan(std_y2)
                                 or np.isnan(std_xy)))
                    self.letter_distributions[alpha][finger] = [
                        xc, yc, std_x2, std_y2, std_xy, p
                    ]

            if sum(self.letter_fingers[alpha]) != 0:
                self.letter_fingers[alpha] /= sum(self.letter_fingers[alpha])
                std_fingering = np.argmax(self.letter_fingers[alpha])
                for finger in range(10):
                    if self.letter_fingers[alpha][finger] == 0:
                        self.letter_distributions[alpha][
                            finger] = self.letter_distributions[alpha][
                                std_fingering].copy()
                    self.letter_fingers[alpha][finger] = max(
                        self.letter_fingers[alpha][finger], 0.001)

        #plt.show()
        pickle.dump([
            self.letter_positions, self.letter_fingers,
            self.letter_distributions
        ], open('models/touch.model', 'wb'))
        self.decoder = Decoder()
Exemplo n.º 40
0
    # Replace train_idxs with this one to order the batches by sentence length
    ordered_batch_idxs = np.argsort(np.array([np.count_nonzero(s) for s in c.train_src_idxs]) * -1)
    xv = c.train_src_idxs.view([('w%d' % i, 'i4') for i in range(32)])
    similar_batch_idxs = np.argsort(xv, axis=0, order=[('w%d' % i) for i in range(32)]).flatten()

    # Process validation data
    if conf.do_validate:
        no_val_improvement = 0
        early_stopped = False
        y_val_strings = [c.trg_idx_to_sent(s) for s in c.valid_trg_idxs]
        X_val, y_val = next(cstm_model.batch_iterator(c.valid_src_idxs, c.valid_trg_idxs, c.valid_src_idxs.shape[0], len(c.trg_vocab)))
        logging.info("Will validate on (%s) %d sentences." % (conf.valid_prefix, c.valid_src_idxs.shape[0]))
        logging.info("Training will stop after %d validations without improvement." % conf.max_patience)
        scorer = MultiBleuScorer()
        decoder = Decoder(c.trg_vocab["</s>"], c.trg_vocab["<unk>"], conf.beam_size, generate_unk=False)
        best_val_bleu = BLEUScore()

        # Validation prediction placeholder
        y_pred_val = np.ndarray((X_val.shape[0], c.train_src_idxs.shape[1], len(c.trg_vocab))).astype(np.float32)

    # Process test data
    if conf.do_test:
        y_test_strings = [c.trg_idx_to_sent(s) for s in c.test_trg_idxs]
        X_test, y_test = next(cstm_model.batch_iterator(c.test_src_idxs, c.test_trg_idxs, c.test_src_idxs.shape[0], len(c.trg_vocab)))
        y_pred_test = np.ndarray((X_test.shape[0], c.train_src_idxs.shape[1], len(c.trg_vocab))).astype(np.float32)
        logging.info("Will test on (%s) %d sentences." % (conf.test_prefix, c.test_src_idxs.shape[0]))


    # Create the model
    logging.info("Creating model...")
Exemplo n.º 41
0
 def __init__(self, encoder_weights_path):
     self.encoder = Encoder(encoder_weights_path)
     self.decoder = Decoder()
     self.SAModule = SAMod(512)
Exemplo n.º 42
0
def translate(sentence):
	sentence = pre_process(sentence)
	decoder = Decoder()
	sentence = decoder.decode(sentence)
	sentence = post_process(sentence)
	return sentence
Exemplo n.º 43
0
 def test_mixed_chars(self):
     decoder = Decoder()
Exemplo n.º 44
0
class Citrocan(App):

    dec = None
    update = False
    stop_ev = threading.Event()
    d_time = StringProperty()
    d_date = StringProperty()
    d_temp = StringProperty()
    d_vol = StringProperty()
    d_band = StringProperty()
    d_name = StringProperty()
    d_info = StringProperty()
    d_title = StringProperty()
    d_memch = StringProperty()
    d_dx = StringProperty()
    d_rds = StringProperty()
    d_rds_ok = BooleanProperty()
    d_ta = StringProperty()
    d_ta_ok = BooleanProperty()
    d_pty = StringProperty()
    d_pty_ok = BooleanProperty()
    d_ptyname = StringProperty()
    d_rdtxt_rnd = StringProperty()
    d_reg = StringProperty()
    d_loud = StringProperty()
    d_icon = StringProperty("icon")
    d_volbar = NumericProperty()
    d_alert = StringProperty()
    d_debug = StringProperty()

    def build(self):
        Window.size = (1024, 520)
        self.dec = Decoder(self.prop_set)
        Clock.schedule_interval(self.update_time, .5)
        Clock.schedule_interval(self.visualize, .4)
        thr = threading.Thread(target=self.get_candata)
        thr.setDaemon(True)
        thr.start()

    def update_time(self, *_):
        self.d_time = time.strftime("%H %M" if ':' in self.d_time else "%H:%M")
        self.d_date = time.strftime("%a %d/%m/%Y")

    def visualize(self, *_):
        if self.dec and self.update:
            self.update = False
            self.dec.visualize()

    def prop_set(self, var, val):
        if self.__getattribute__("d_" + var) != val:
            self.__setattr__("d_" + var, val)

    def file_receiver(self, on_recv, fname):
        old_tm = .0
        sp = open(fname, "r")
        for ln in sp:
            if self.stop_ev.is_set():
                break
            buf = ln.strip()
            # print("got:", buf)
            if len(buf):
                tm, _, b = buf.partition(' ')
                if old_tm:
                    time.sleep(float(tm) - old_tm)
                old_tm = float(tm)
                if b[0] in ('R', 'S'):
                    on_recv(b)
        sp.close()
        print("EOF, stop playing.")

    def serial_receiver(self, on_recv):
        sp = None
        while not self.stop_ev.is_set():
            if not sp:
                buf = []
                ready = False
                try:
                    sp = serial.Serial(port=Port, baudrate=460800, timeout=1)
                except (ValueError, serial.SerialException) as e:
                    print("can't open serial:", e)
                    if self.dec.connected:
                        self.dec.connected = False
                        self.update = True

            if sp and not ready:
                try:
                    sp.write("i0\r\n".encode())
                except serial.SerialTimeoutException as e:
                    print("can't write to serial:", e)
                    time.sleep(1)

            if sp:
                while not self.stop_ev.is_set():
                    try:
                        r = sp.read(1)
                    except serial.SerialException:
                        sp.close()
                        sp = None
                        r = None
                    if not r:
                        break
                    if r == b'\n':
                        # print("got:", ''.join(buf))
                        if len(buf):
                            if buf[0] in ('R', 'S'):
                                on_recv(''.join(buf))
                            elif buf[0] == 'I':
                                ready = True
                            buf = []
                    elif r >= b' ':
                        buf.append(r.decode())
            else:
                time.sleep(1)

        if sp:
            sp.close()

    def bt_receiver(self, on_recv):
        BluetoothAdapter = autoclass('android.bluetooth.BluetoothAdapter')
        UUID = autoclass('java.util.UUID')

        sock = None
        while not self.stop_ev.is_set():
            if not sock:
                buf = []
                send = None
                recv = None
                ready = False
                if self.dec.connected:
                    self.dec.connected = False
                    self.update = True
                paired = BluetoothAdapter.getDefaultAdapter().getBondedDevices().toArray()
                for dev in paired:
                    if dev.getName() == BtName:
                        sock = dev.createRfcommSocketToServiceRecord(UUID.fromString("00001101-0000-1000-8000-00805F9B34FB"))
                        recv = sock.getInputStream()
                        send = sock.getOutputStream()
                        print("wait for connection")
                        try:
                            sock.connect()
                        except Exception as e:
                            sock.close()
                            sock = None
                            print("can't connect bluetooth:", e)
                        break

            if sock and not ready:
                print("sending init")
                send.write("i0\r\n")
                send.flush()

            if sock:
                while not self.stop_ev.is_set():
                    try:
                        r = recv.read()
                    except Exception as e:
                        print("can't read from bluetooth:", e)
                        sock.close()
                        sock = None
                        r = None
                    if not r:
                        break
                    if r == 13:
                        # print("got:", ''.join(buf))
                        if len(buf):
                            if buf[0] in ('R', 'S'):
                                on_recv(''.join(buf))
                            elif buf[0] == 'I':
                                ready = True
                            buf = []
                    elif r >= 32:
                        buf.append(chr(r))

            else:
                time.sleep(1)

        if sock:
            sock.close()

    def get_candata(self):
        self.dec.connected = False
        self.update = True

        def on_recv(buf):
            # print("recv:", buf)
            try:
                flds = buf.split()
                cid = int(flds[1], 16)
                clen = int(flds[2])
                cflds = []
                for n in range(clen):
                    cflds.append(int(flds[n + 3], 16))
                if self.dec and self.dec.decode(cid, clen, cflds):
                    self.dec.connected = True
                    self.update = True
            except (TypeError, ValueError, IndexError) as e:
                print("can't decode:", buf, e)

        if FromFile:
            self.file_receiver(on_recv, FromFile)
        elif autoclass:
            self.bt_receiver(on_recv)
        else:
            self.serial_receiver(on_recv)

    def on_pause(self):
        return True

    def on_resume(self):
        pass

    def on_stop(self):
        self.stop_ev.set()
Exemplo n.º 45
0
 def test_mixed_case(self):
     decoder = Decoder()
Exemplo n.º 46
0
 def __init__(self, decoder=None):
     if decoder is None:
         self.decoder = Decoder()
     else:
         self.decoder = decoder
Exemplo n.º 47
0
 def test_all_lower(self):
     decoder = Decoder()
Exemplo n.º 48
0
class AttentiveNP():
    """
    The Attentive Neural Process model.
    """
    def __init__(self, x_size, y_size, r_size, det_encoder_hidden_size,
                 det_encoder_n_hidden, lat_encoder_hidden_size,
                 lat_encoder_n_hidden, decoder_hidden_size, decoder_n_hidden,
                 attention_type):
        """

        :param x_size: An integer describing the dimensionality of the input x
        :param y_size: An integer describing the dimensionality of the target variable y
        :param r_size: An integer describing the dimensionality of the embedding / context
                       vector r
        :param det_encoder_hidden_size: An integer describing the number of nodes per hidden
                                    layer in the deterministic encoder NN
        :param det_encoder_n_hidden: An integer describing the number of hidden layers in the
                                 deterministic encoder neural network
        :param lat_encoder_hidden_size: An integer describing the number of nodes per hidden
                                    layer in the latent encoder neural NN
        :param lat_encoder_n_hidden: An integer describing the number of hidden layers in the
                                 latent encoder neural network
        :param decoder_hidden_size: An integer describing the number of nodes per hidden
                                    layer in the decoder neural network
        :param decoder_n_hidden: An integer describing the number of hidden layers in the
                                 decoder neural network
        :param attention_type: The type of attention to be used. A string, either "multihead",
                                "laplace", "uniform", "dot_product"
        """

        self.x_size = x_size
        self.y_size = y_size
        self.r_size = r_size
        self.det_encoder = DeterministicEncoder(x_size,
                                                y_size,
                                                r_size,
                                                det_encoder_n_hidden,
                                                det_encoder_hidden_size,
                                                self_att=True,
                                                cross_att=True,
                                                attention_type="multihead")
        self.lat_encoder = LatentEncoder((x_size + y_size), r_size,
                                         lat_encoder_n_hidden,
                                         lat_encoder_hidden_size)
        self.decoder = Decoder((x_size + r_size + r_size), y_size,
                               decoder_n_hidden, decoder_hidden_size)
        self.optimiser = optim.Adam(
            list(self.det_encoder.parameters()) +
            list(self.lat_encoder.parameters()) +
            list(self.decoder.parameters()))

    def train(self, x_train, y_train, x_test, y_test, x_scaler, y_scaler,
              batch_size, lr, iterations, testing, plotting):
        """
        :param x_train: A tensor with dimensions [N_train, x_size] containing the training
                        data (x values)
        :param y_train: A tensor with dimensions [N_train, y_size] containing the training
                        data (y values)
        :param x_test: A tensor with dimensions [N_test, x_size] containing the test data
                       (x values)
        :param y_test: A tensor with dimensions [N_test, y_size] containing the test data
                       (y values)
        :param x_scaler: The standard scaler used when testing == True to convert the
                         x values back to the correct scale.
        :param y_scaler: The standard scaler used when testing == True to convert the predicted
                         y values back to the correct scale.
        :param batch_size: An integer describing the number of times we should
                                    sample the set of context points used to form the
                                    aggregated embedding during training, given the number
                                    of context points to be sampled N_context. When testing
                                    this is set to 1
        :param lr: A float number, describing the optimiser's learning rate
        :param iterations: An integer, describing the number of iterations. In this case it
                           also corresponds to the number of times we sample the number of
                           context points N_context
        :param testing: A Boolean object; if set to be True, then every 30 iterations the
                        R^2 score and RMSE values will be calculated and printed for
                        both the train and test data
        :return:
        """
        self.gp_sampler = GPSampler(data=(x_train, y_train))
        self.batch_size = batch_size
        self._max_num_context = x_train.shape[0]
        self.iterations = iterations

        #Convert the data for use in PyTorch.
        x_train = torch.from_numpy(x_train).float()
        y_train = torch.from_numpy(y_train).float()
        x_test = torch.from_numpy(x_test).float()
        y_test = torch.from_numpy(y_test).float()

        # At prediction time the context points comprise the entire training set.
        x_tot_context = torch.unsqueeze(x_train, dim=0)
        y_tot_context = torch.unsqueeze(y_train, dim=0)

        for iteration in range(iterations):
            self.optimiser.zero_grad()

            # Randomly select the number of context points N_context (uniformly from 3 to
            # N_train)
            num_context = np.random.randint(low=1, high=self._max_num_context)

            # Randomly select N_context context points from the training data, a total of
            # batch_size times.
            x_context, y_context, x_target, y_target = self.gp_sampler.sample(
                batch_size=self.batch_size,
                train_size=50,
                num_context=num_context,
                x_min=-4,
                x_max=4)

            x_context = torch.from_numpy(x_context).float()
            y_context = torch.from_numpy(y_context).float()
            x_target = torch.from_numpy(x_target).float()
            y_target = torch.from_numpy(y_target).float()

            # The input to both the deterministic and latent encoder is (x, y)_i for all data points in the set of context
            # points.
            input_context = torch.cat((x_context, y_context), dim=2)
            input_target = torch.cat((x_target, y_target), dim=2)

            #The deterministic encoder outputs the deterministic embedding r.
            r = self.det_encoder.forward(
                x_context, y_context,
                x_target)  #[batch_size, N_target, r_size]

            # The latent encoder outputs a prior distribution over the latent embedding z (conditioned only on the context points).
            z_priors, mu_prior, sigma_prior = self.lat_encoder.forward(
                x_context, y_context)

            if y_target is not None:
                z_posteriors, mu_posterior, sigma_posterior = self.lat_encoder.forward(
                    x_target, y_target)
                zs = [dist.sample()
                      for dist in z_posteriors]  #[batch_size, r_size]

            else:
                zs = [dist.sample()
                      for dist in z_priors]  #[batch_size, r_size]

            z = torch.cat(zs)
            z = z.view(-1, self.r_size)

            # The input to the decoder is the concatenation of the target x values, the deterministic embedding r and the latent variable z
            # the output is the predicted target y for each value of x.
            dists_y, _, _ = self.decoder.forward(x_target.float(), r.float(),
                                                 z.float())

            # Calculate the loss
            log_ps = [
                dist.log_prob(y_target[i, ...].float())
                for i, dist in enumerate(dists_y)
            ]
            log_ps = torch.cat(log_ps)

            kl_div = [
                kl_divergence(z_posterior, z_prior).float()
                for z_posterior, z_prior in zip(z_posteriors, z_priors)
            ]
            kl_div = torch.tensor(kl_div)

            loss = -(torch.mean(log_ps) - torch.mean(kl_div))
            self.losslogger = loss

            # The loss should generally decrease with number of iterations, though it is not
            # guaranteed to decrease monotonically because at each iteration the set of
            # context points changes randomly.
            if iteration % 200 == 0:
                print("Iteration " + str(iteration) +
                      ":, Loss = {:.3f}".format(loss.item()))
                # We can set testing = True if we want to check that we are not overfitting.
                if testing:

                    r2_train_list = []
                    rmse_train_list = []
                    nlpd_train_list = []
                    r2_test_list = []
                    rmse_test_list = []
                    nlpd_test_list = []

                    #Useful for determining uncertainty due to sampling z.
                    for j in range(10):
                        _, predict_train_mean, predict_train_var = self.predict(
                            x_tot_context, y_tot_context, x_tot_context)
                        predict_train_mean = np.squeeze(
                            predict_train_mean.data.numpy(), axis=0)
                        predict_train_var = np.squeeze(
                            predict_train_var.data.numpy(), axis=0)

                        # We transform the standardised predicted and actual y values back to the original data
                        # space
                        y_train_mean_pred = y_scaler.inverse_transform(
                            predict_train_mean)
                        y_train_var_pred = y_scaler.var_ * predict_train_var
                        y_train_untransformed = y_scaler.inverse_transform(
                            y_train)

                        r2_train = r2_score(y_train_untransformed,
                                            y_train_mean_pred)
                        nlpd_train = nlpd(y_train_mean_pred, y_train_var_pred,
                                          y_train_untransformed)
                        rmse_train = np.sqrt(
                            mean_squared_error(y_train_untransformed,
                                               y_train_mean_pred))
                        r2_train_list.append(r2_train)
                        rmse_train_list.append(rmse_train)
                        nlpd_train_list.append(nlpd_train)

                        x_test = torch.unsqueeze(x_test, dim=0)
                        _, predict_test_mean, predict_test_var = self.predict(
                            x_tot_context, y_tot_context, x_test)
                        x_test = torch.squeeze(x_test, dim=0)
                        predict_test_mean = np.squeeze(
                            predict_test_mean.data.numpy(), axis=0)
                        predict_test_var = np.squeeze(
                            predict_test_var.data.numpy(), axis=0)

                        # We transform the standardised predicted and actual y values back to the original data
                        # space
                        y_test_mean_pred = y_scaler.inverse_transform(
                            predict_test_mean)
                        y_test_var_pred = y_scaler.var_ * predict_test_var
                        y_test_untransformed = y_scaler.inverse_transform(
                            y_test)

                        indices = np.random.permutation(
                            y_test_untransformed.shape[0])[0:20]
                        r2_test = r2_score(y_test_untransformed[indices, 0],
                                           y_test_mean_pred[indices, 0])
                        rmse_test = np.sqrt(
                            mean_squared_error(
                                y_test_untransformed[indices, 0],
                                y_test_mean_pred[indices, 0]))
                        nlpd_test = nlpd(y_test_mean_pred[indices, 0],
                                         y_test_var_pred[indices, 0],
                                         y_test_untransformed[indices, 0])

                        r2_test_list.append(r2_test)
                        rmse_test_list.append(rmse_test)
                        nlpd_test_list.append(nlpd_test)

                    r2_train_list = np.array(r2_train_list)
                    rmse_train_list = np.array(rmse_train_list)
                    nlpd_train_list = np.array(nlpd_train_list)
                    r2_test_list = np.array(r2_test_list)
                    rmse_test_list = np.array(rmse_test_list)
                    nlpd_test_list = np.array(nlpd_test_list)

                    print("\nR^2 score (train): {:.3f} +- {:.3f}".format(
                        np.mean(r2_train_list),
                        np.std(r2_train_list) / np.sqrt(len(r2_train_list))))
                    #print("RMSE (train): {:.3f} +- {:.3f}".format(np.mean(rmse_train_list) / np.sqrt(
                    #len(rmse_train_list))))
                    print("NLPD (train): {:.3f} +- {:.3f}".format(
                        np.mean(nlpd_train_list),
                        np.std(nlpd_train_list) /
                        np.sqrt(len(nlpd_train_list))))
                    print("R^2 score (test): {:.3f} +- {:.3f}".format(
                        np.mean(r2_test_list),
                        np.std(r2_test_list) / np.sqrt(len(r2_test_list))))
                    #print("RMSE (test): {:.3f} +- {:.3f}".format(np.mean(rmse_test_list),
                    #np.std(rmse_test_list) / np.sqrt(len(rmse_test_list))))
                    print("NLPD (test): {:.3f} +- {:.3f}\n".format(
                        np.mean(nlpd_test_list),
                        np.std(nlpd_test_list) / np.sqrt(len(nlpd_test_list))))

                    if iteration % 1000 == 0:
                        if plotting:
                            x_c = x_scaler.inverse_transform(np.array(x_train))
                            y_c = y_train_untransformed
                            x_t = x_scaler.inverse_transform(np.array(x_test))
                            y_t = x_t**3

                            plt.figure(figsize=(7, 7))
                            plt.scatter(x_c,
                                        y_c,
                                        color='red',
                                        s=15,
                                        marker='o',
                                        label="Context points")
                            plt.plot(x_t,
                                     y_t,
                                     linewidth=1,
                                     color='red',
                                     label="Ground truth")
                            plt.plot(x_t,
                                     y_test_mean_pred,
                                     color='darkcyan',
                                     linewidth=1,
                                     label='Mean prediction')
                            plt.plot(x_t[:, 0],
                                     y_test_mean_pred[:, 0] -
                                     1.96 * np.sqrt(y_test_var_pred[:, 0]),
                                     linestyle='-.',
                                     marker=None,
                                     color='darkcyan',
                                     linewidth=0.5)
                            plt.plot(x_t[:, 0],
                                     y_test_mean_pred[:, 0] +
                                     1.96 * np.sqrt(y_test_var_pred[:, 0]),
                                     linestyle='-.',
                                     marker=None,
                                     color='darkcyan',
                                     linewidth=0.5,
                                     label='Two standard deviations')
                            plt.fill_between(
                                x_t[:, 0],
                                y_test_mean_pred[:, 0] -
                                1.96 * np.sqrt(y_test_var_pred[:, 0]),
                                y_test_mean_pred[:, 0] +
                                1.96 * np.sqrt(y_test_var_pred[:, 0]),
                                color='cyan',
                                alpha=0.2)
                            plt.title('Predictive distribution')
                            plt.ylabel('f(x)')
                            plt.yticks([-80, -60, -40, -20, 0, 20, 40, 60, 80])
                            plt.ylim(-80, 80)
                            plt.xlim(-4, 4)
                            plt.xlabel('x')
                            plt.xticks([-4, -2, 0, 2, 4])
                            plt.legend()
                            plt.savefig('results/anp_1dreg_crossatt_2selfatt' +
                                        str(iteration) + '.png')

            loss.backward()
            self.optimiser.step()

    def predict(self, x_context, y_context, x_target):
        """
        :param x_context: A tensor of dimensions [batch_size, N_context, x_size].
                          When training N_context is randomly sampled between 3 and N_train;
                          when testing N_context = N_train
        :param y_context: A tensor of dimensions [batch_size, N_context, y_size]
        :param x_target: A tensor of dimensions [N_target, x_size]
        :return dist: The distributions over the predicted outputs y_target
        :return mu: A tensor of dimensionality [batch_size, N_target, output_size]
                    describing the means
                    of the normal distribution.
        :return var: A tensor of dimensionality [batch_size, N_target, output_size]
                     describing the variances of the normal distribution.
        """

        r = self.det_encoder.forward(x_context, y_context, x_target)
        # The latent encoder outputs a distribution over the latent embedding z.
        dists_z, _, _ = self.lat_encoder.forward(x_context, y_context)
        zs = [dist.sample() for dist in dists_z]  # [batch_size, r_size]
        z = torch.cat(zs)
        z = z.view(-1, self.r_size)

        # The input to the decoder is the concatenation of the target x values, the deterministic embedding r and the latent variable z
        # the output is the predicted target y for each value of x.
        dists_y, _, _ = self.decoder.forward(x_target.float(), r.float(),
                                             z.float())

        # The input to the decoder is the concatenation of the target x values, the deterministic embedding r and the latent variable z
        # the output is the predicted target y for each value of x.
        dist, mu, sigma = self.decoder.forward(x_target.float(), r.float(),
                                               z.float())

        return dist, mu, sigma
Exemplo n.º 49
0
    microsecond = arg[19:]
    microsecond += (6-len(microsecond))*'0'
    microsecond = int(microsecond)
    

    new_pulses.append(datetime(year,month,day,hour,minute,second,microsecond))

last = None
for p in new_pulses:
    if last is not None:
        if show_deltas:
            print p - last
    last = p
    

decoder = Decoder()

frame_decoder = FrameDecoder()

new_symbols = decoder.decode(new_pulses,debug=False)

#for ns in new_symbols:
#    print ns

new_data = frame_decoder.decode(new_symbols)

for d in new_data:
    print d.name,"=",d.value,"@",d.timeStamp
    

Exemplo n.º 50
0
def test():
    '''
    main function to run the training
    '''
    testFolder = MovingMNIST(is_train=False,
                              root='../data/npy-064/',
                              mode ='test',
                              n_frames_input=args.frames_input,
                              n_frames_output=args.frames_output,
                              num_objects=[3])
    testLoader = torch.utils.data.DataLoader(testFolder,
                                              batch_size=args.batch_size,
                                              shuffle=False)

    if args.convlstm:
        encoder_params = convlstm_encoder_params
        decoder_params = convlstm_decoder_params
    if args.convgru:
        encoder_params = convgru_encoder_params
        decoder_params = convgru_decoder_params
    else:
        encoder_params = convgru_encoder_params
        decoder_params = convgru_decoder_params

    #TIMESTAMP = args.timestamp
    # restore args

    CHECKPOINT = args.checkpoint    
    TIMESTAMP = args.timestamp
    save_dir = './save_model/' + TIMESTAMP

    args_path = os.path.join(save_dir, 'cmd_args.txt')
    if os.path.exists(args_path):
        with open(args_path, 'r') as f:
            args.__dict__ = json.load(f)
            args.is_train = False
    encoder = Encoder(encoder_params[0], encoder_params[1]).cuda()
    decoder = Decoder(decoder_params[0], decoder_params[1], args.frames_output).cuda()
    net = ED(encoder, decoder)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    if torch.cuda.device_count() > 1:
        net = nn.DataParallel(net)
    net.to(device)

    if os.path.exists(save_dir):
        # load existing model
        print('==> loading existing model')
        model_info = torch.load(CHECKPOINT)
        net.load_state_dict(model_info['state_dict'])
        optimizer = torch.optim.Adam(net.parameters())
        optimizer.load_state_dict(model_info['optimizer'])
    else:
        print('there is no such checkpoint in', save_dir)
        exit()
    lossfunction = nn.MSELoss().cuda()
    # to track the testation loss as the model trains
    test_losses = []
    # to track the average training loss per epoch as the model trains
    avg_test_losses = []
    # mini_val_loss = np.inf

    preds = [] 
    ######################
    # testate the model #
    ######################
    with torch.no_grad():
        net.eval()
        t = tqdm(testLoader, leave=False, total=len(testLoader))
        for i, (idx, targetVar, inputVar, _, _) in enumerate(t):
            if i == 3000:
                break
            inputs = inputVar.to(device)
            #label = targetVar.to(device)
            pred = net(inputs)
            #loss = lossfunction(pred, label)
            preds.append(pred)
            #loss_aver = loss.item() / args.batch_size
            # record testation loss
            #test_losses.append(loss_aver)

    torch.cuda.empty_cache()
    # print training/testation statistics
    # calculate average loss over an epoch
    #test_loss = np.average(test_losses)
    #avg_test_losses.append(test_loss)
    
    #print_msg = (f'test_loss: {test_loss:.6f}')
    #print(print_msg)

    import pickle
    with open("preds.pkl", "wb") as fp:
        pickle.dump(preds, fp)
Exemplo n.º 51
0
 def test_all_caps(self):
     decoder = Decoder()
Exemplo n.º 52
0
class Keyboard:
    GRID = 50
    TASK_NUM = 20
    CORPUS_NUM = 20000

    VISABLE_NO = 0
    VISABLE_TOUCH = 1
    VISABLE_ALWAYS = 2
    CORRECT_NO = 0
    CORRECT_WORD = 1
    CORRECT_LETTER = 2

    def __init__(self,
                 VISABLE_FEEDBACK=VISABLE_ALWAYS,
                 WORD_CORRECTION=CORRECT_WORD):
        self.VISABLE_FEEDBACK = VISABLE_FEEDBACK
        self.WORD_CORRECTION = WORD_CORRECTION
        self.init_letter_info()
        self.init_task_list('phrases.txt')
        self.init_decoder()
        self.init_inputted_data()
        self.init_display()
        self.init_sound()

    def init_letter_info(self):
        FINGERS = ['QAZ|P', 'WSX|OL', 'EDC|IK', 'RFV|TGB', 'YHN|UJM']
        COLORS = [(0, 64, 0), (64, 0, 64), (64, 64, 0), (0, 64, 64),
                  (0, 0, 64)]
        self.letter_colors = []
        for alpha in range(26):
            ch = chr(alpha + ord('A'))
            for (finger, color) in zip(FINGERS, COLORS):
                if ch in finger:
                    self.letter_colors.append(color)
                    break

    def init_task_list(self, path):
        self.task_list = []
        self.curr_task_id = 0

        lines = open(path).readlines()
        for line in lines:
            line = line.lower()
            self.task_list.append(line.strip('\n'))

        random.shuffle(self.task_list)
        self.task_list = self.task_list[:self.TASK_NUM]
        self.task = self.task_list[self.curr_task_id]

    def init_decoder(self):
        self.decoder = Decoder()

    def init_inputted_data(self):
        self.redo_phrase()

    def init_display(self):
        self.screen = pygame.display.set_mode(
            (10 * self.GRID + 1, 4 * self.GRID + 1))
        pygame.display.set_caption('Qwerty Watch')
        self.L_row = None  # Hightline line
        self.L_col = None
        self.R_row = None
        self.R_col = None

    def init_sound(self):
        self.sound_do = pygame.mixer.Sound("sound/do.wav")
        self.sound_do.set_volume(0.2)
        self.sound_re = pygame.mixer.Sound("sound/re.wav")
        self.sound_re.set_volume(0.2)
        self.sound_mi = pygame.mixer.Sound("sound/mi.wav")
        self.sound_mi.set_volume(0.2)
        self.sound_type = pygame.mixer.Sound("sound/type.wav")
        self.sound_type.set_volume(1.0)

    def draw(self):
        GRID = self.GRID
        image = np.zeros((4 * GRID + 1, 10 * GRID + 1, 3), np.uint8)

        cv2.rectangle(image, (0, 0), (10 * GRID, GRID - 1), (0, 0, 0), -1)

        # Draw task and inputted text
        cv2.putText(image, self.task, (int(GRID * 0.5), int(GRID * 0.4)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
        cv2.putText(image, self.inputted_text + '_',
                    (int(GRID * 0.5), int(GRID * 0.8)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)

        # Draw the keyboard layout
        for i in range(26):
            ch = chr(i + ord('A'))
            pos = self.decoder.positions[i]
            bg_color = self.letter_colors[i]
            cv2.rectangle(image, (int(pos[0] * GRID), int(
                (pos[1] + 1) * GRID)), (int(
                    (pos[0] + 1) * GRID), int((pos[1] + 2) * GRID)), bg_color,
                          -1)
            cv2.rectangle(image, (int(pos[0] * GRID), int(
                (pos[1] + 1) * GRID)), (int(
                    (pos[0] + 1) * GRID), int((pos[1] + 2) * GRID)),
                          (255, 255, 255), 1)
            cv2.putText(image, ch,
                        (int(pos[0] * GRID) + 15, int(
                            (pos[1] + 2) * GRID) - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        # Visable feedback
        if self.VISABLE_FEEDBACK == self.VISABLE_ALWAYS:
            if self.L_row != None:
                row = max(0.5, min(3.5, self.L_row))
                row_pixel = int((row - 0.5 + 1) * GRID)
                image[row_pixel - 2:row_pixel + 3, :5 * GRID] *= 2
            if self.R_row != None:
                row = max(0.5, min(3.5, self.R_row))
                row_pixel = int((row - 0.5 + 1) * GRID)
                image[row_pixel - 2:row_pixel + 3, 5 * GRID:] *= 2
            if self.L_col != None:
                col = max(0.5, min(2.5, self.L_col))
                col_pixel = int((2.5 + col) * GRID)
                image[1 * GRID:4 * GRID, col_pixel - 2:col_pixel + 3] *= 2
            if self.R_col != None:
                col = max(0.5, min(2.5, self.R_col))
                col_pixel = int((7.5 - col) * GRID)
                image[1 * GRID:4 * GRID, col_pixel - 2:col_pixel + 3] *= 2
        elif self.VISABLE_FEEDBACK == self.VISABLE_TOUCH:
            DURATION = 0.5
            if time.clock() - self.last_touch_time < DURATION and len(
                    self.inputted_data) > 0:
                [col, row] = self.get_position(self.inputted_data[-1])
                row_pixel = int((row - 0.5 + 2) * GRID)
                col_pixel = int((col - 0.5 + 1) * GRID)
                schedule = (time.clock() - self.last_touch_time) / DURATION
                image[row_pixel - 5:row_pixel + 6,
                      col_pixel - 5:col_pixel + 6] = cv2.add(
                          image[row_pixel - 5:row_pixel + 6,
                                col_pixel - 5:col_pixel + 6],
                          int(255 * (1 - schedule)))
        elif self.VISABLE_FEEDBACK == self.VISABLE_NO:
            pass

        pg_img = pygame.surfarray.make_surface(cv2.transpose(image))
        self.screen.blit(pg_img, (0, 0))
        pygame.display.flip()

    def next_phrase(self):
        self.curr_task_id += 1
        print('Phase = %d' % (self.curr_task_id))
        self.redo_phrase()
        if self.curr_task_id >= len(self.task_list):
            self.curr_task_id = 0
            return False
        self.task = self.task_list[self.curr_task_id]
        return True

    def redo_phrase(self):
        self.inputted_space_cnt = 0
        self.inputted_text = ''
        self.inputted_data = []
        self.last_touch_time = -1

    def enter_a_letter(self, input_data, input_letter):
        self.sound_type.play()
        i = len(self.inputted_text)
        letter = ''
        if i < len(self.task):
            if self.WORD_CORRECTION == self.CORRECT_LETTER:
                if self.task[
                        i] == ' ':  # can not enter space by inputting letter, when CORRECT_LETTER
                    return ''
                letter = self.task[i]
            else:
                letter = input_letter
            self.inputted_text += letter
            self.inputted_data.append(input_data)
            self.last_touch_time = time.clock()
        return letter

    def enter_a_space(self, input_data):
        self.sound_type.play()
        i = len(self.inputted_text)
        if i == 0 or self.inputted_text[-1] == ' ':  # can not enter two spaces
            return
        if self.WORD_CORRECTION == self.CORRECT_WORD:
            tags = self.inputted_text.split(' ')
            if len(tags) > 0 and tags[-1] != '':
                word = self.decoder.predict(
                    self.inputted_data[-len(tags[-1]):],
                    self.task[:len(self.inputted_text)])
                if word != '':  # '' means no match
                    tags[-1] = word
                self.inputted_text = ' '.join(tags)
        if i < len(self.task):
            self.inputted_space_cnt += 1
            self.inputted_text += ' '
            self.inputted_data.append(input_data)

    def delete_a_letter(self):
        self.sound_type.play()
        if len(self.inputted_text) > 0:
            self.inputted_text = self.inputted_text[:-1]
            self.inputted_data = self.inputted_data[:-1]
            if self.inputted_text == '':
                self.inputted_space_cnt = 0

    def get_position(self, data):  # get position from inputted data
        [side, index, highlight_row, highlight_col] = data[:4]
        row = max(0 - 0.5, min(2 + 0.5, highlight_row - 1))
        col = max(0 - 0.5, min(1 + 0.5, highlight_col - 1))
        if side == 'L':
            if index == 1:
                col = 3 + col
            else:
                col = 3 - (index - 1)
        if side == 'R':
            if index == 1:
                col = 6 - col
            else:
                col = 6 + (index - 1)
        return [col, row]
Exemplo n.º 53
0
def main():
    import sys
    # prepare training data for language model and translation model
    sys.path.append('../../clir/')
    from aligned_parser import GizaReader
    data = GizaReader('../../data/giza/', 'alignment-en-fr')
    
    # instantiate translation model 
    tm = TranslationModel(data)
    
    # n for n-gram language model
    n = 2
    # instantiate language model
    lm = LanguageModel(n, data)
    
    # alpha for reordering model
    alpha = 0.75
    # instantiate reordering model
    rm = ReorderingModel(alpha)
    
    # decoder_stack_threshold for decoder stacks
    decoder_stack_threshold = 5
    
    # the number of results will be produced to output
    num_results = 10
    
    translated_src_mask = None
    last_translated_index = None
    # interactive command line input
    print fill("Please enter the full source sentence:", 79)
    src_sent = to_unicode_or_bust(raw_input(), sys.stdin.encoding).split()
    for i, word in enumerate(src_sent):
        print word.encode(sys.stdout.encoding), "(%d)" % i, 
    print
    
    print fill("Please enter the partial translation:", 79)
    partial_tgt_sent = to_unicode_or_bust(raw_input(), sys.stdin.encoding).split()
    
    if partial_tgt_sent != []:
    
        print fill("Which source words were translated?", 79)
        translated = list(eval(raw_input()))
        translated_src_mask = [ i in translated for i in range(len(src_sent))]
        print translated_src_mask
        
        print fill("Which source word is aligned with the last translated word \"%s\"?" 
                                % (partial_tgt_sent[-1]), 79)    
        last_translated_index = eval(raw_input())
    else:
        partial_tgt_sent = None
    
    # instantiate a decoder with all the input data collected  
    decoder = Decoder(lm, rm, tm,
                      src_sent,
                      decoder_stack_threshold,
                      translated_src_mask, 
                      last_translated_index,
                      partial_tgt_sent           )
    
    # invoke decode() method to complete decoding
    decoder.decode()
    
    # DEBUG ONLY
#    print len(decoder._decoder_stacks[-1].decompose())
#    for hypo in decoder.decoder_stacks[-1]:
#        print hypo.last_n_targets(len(src_sent)), hypo.partial_score

    # print results
    print
    print fill("Translation suggestions:", 79)
    decoder.print_ranked_results(num_results);
Exemplo n.º 54
0
 def init_decoder(self):
     self.decoder = Decoder()
Exemplo n.º 55
0
class TextCleanser(object):
    
    def __init__(self):
        """Constructor"""
        self.generator = Generator()
        self.decoder = Decoder()
#        print "READY"
    
    def heuristic_cleanse(self, text, gen_off_by_ones=False, ssk=False):
        """Accept noisy text, run through cleanser described in Gouws et al. 2011, and 
        return the cleansed text. 
        If gen_off_by_ones==True, generate spelling variants (1 edit distance away)."""
        gen = self.generator
        if ssk:
            string_sim_func=gen.SSK_SIM
        else:
            string_sim_func=gen.IBM_SIM
        replacements, old_tokens, candidates = gen.sent_generate_candidates(text, string_sim_func, 
                                                                            gen_off_by_ones)
#        print candidates
#        word_lattice = gen.generate_word_lattice(candidates)
        word_mesh = gen.generate_word_mesh(candidates)
        cleantext,error = self.decoder.decode(word_mesh)
        if error:
            print "mesh: ", word_mesh
            print cleantext
            print error
#            raw_input("[PRESS ENTER]")
#            exit(2)
#        print "clean: ", cleantext
        replacements = self.get_replacements(cleantext, old_tokens)                
        return cleantext, error, replacements
    
    def phonetic_ED_cleanse(self, text, gen_off_by_ones=True):
        gen = self.generator
        replacements, old_tokens, candidates = gen.sent_generate_candidates(text, gen.PHONETIC_ED_SIM, 
                                                                            gen_off_by_ones)
        #print candidates
#        word_lattice = gen.generate_word_lattice(candidates)
        word_mesh = gen.generate_word_mesh(candidates)
        cleantext,error = self.decoder.decode(word_mesh)
        replacements = self.get_replacements(cleantext, old_tokens)                
        return cleantext, error, replacements
    
    def ssk_cleanse(self, text, gen_off_by_ones=False):
        "Use subsequence overlap similarity function"
        return self.heuristic_cleanse(text, gen_off_by_ones, ssk=True)
    
    def log_oovs(self, text):
        """return a list of all out-of-vocabulary words for pre-processing purposes"""
        raise NotImplemented("Not yet implemented")

    def get_replacements(self, cleantext, old_tokens):
        """return the token replacements that were made"""
        new_tokens = self.generator.fix_bad_tokenisation(cleantext.split())
        # if new_tokens contain more tokens than old_tokens then alignment is screwed
        if len(new_tokens)>len(old_tokens):
            replacements = -1
        else:
            replacements = []
            for i, new_tok in enumerate(new_tokens):
                if i >= len(old_tokens):
                    break
                old_tok = old_tokens[i]
                if new_tok!=old_tok.lower():
                    replacements.append((old_tok, new_tok))
                    
        return replacements
Exemplo n.º 56
0
 def __init__(self):
     self.encoder = Encoder()
     self.decoder = Decoder()
Exemplo n.º 57
0
class Disassembler(object):

    def __init__(self):
        self.decoder = Decoder(self)

    def __getattr__(self, name):
        if not name.startswith('do_'):
            raise AttributeError("Disassembler has no attribute '%s'" % name)

        return name[3:]

    def disassemble(self, pc, mem, limit_addr=sys.maxint, is_trace=False):
        try:
            while pc < limit_addr and pc < len(mem):
                name, is_byte_insn, args, size = self.decoder.decode(pc, mem)
                is_ret = self.is_ret(name, args)
                name, args = self.try_emulate_insn(name, args)
                full_name = name
                if name[0] == 'j':
                    arg_str = '$%+x' % args[0]
                    if not is_trace:
                        arg_str += ' [%x]' % (args[0] + pc)
                else:
                    if is_byte_insn:
                        full_name += '.b'
                    arg_str = (', '.join(map(self.pretty_addr, args)))
                yield pc, '%s\t' % full_name + arg_str
                pc += size
                if (is_ret or name == 'jmp') and not is_trace:
                    break
        except:
            yield pc, 'Failed to disassemble.'

    reg_names = ['pc', 'sp', 'sr', 'cg']

    @staticmethod
    def is_ret(name, args):
        return name == 'mov' and args[0] == Address(3, 1, None) and \
                args[1] == Address(0, 0, None)

    @staticmethod
    def pretty_reg(n):
        if n < 4:
            return Disassembler.reg_names[n]
        return 'r%d' % n

    @staticmethod
    def pretty_addr(addr):
        if addr.loc == 2:
            if addr.mode == 1:
                return '&%04x' % addr.data
            elif addr.mode in [2, 3]:
                return '#%x' % (1 << addr.mode)
        elif addr.loc == 3:
            if addr.mode == 3:
                return '#-1'
            else:
                return '#%x' % addr.mode
        elif addr.mode == 3 and addr.loc == 0:
            return '#%04x' % addr.data

        if addr.mode == 0:
            return Disassembler.pretty_reg(addr.loc)
        elif addr.mode == 1:
            return '%x(%s)' % (addr.data, Disassembler.pretty_reg(addr.loc))
        elif addr.mode == 2:
            return '@r%d' % addr.loc
        else:
            return '@%s+' % Disassembler.pretty_reg(addr.loc)

    @staticmethod
    def try_emulate_insn(name, args):
        if Disassembler.is_ret(name, args):
            return 'ret', []
        elif name == 'mov' and args[1] == Address(0, 0, None):
            return 'br', [args[0]]
        return name, args
Exemplo n.º 58
0
 def __init__(self):
     self.decoder = Decoder(self)
Exemplo n.º 59
0
"""
Python program to realize the
simple stenography which implements both 
coding and decoding part.

:Author: Manthan C S
:GitHub: mnthnx64
"""

from coder import Coder
from decoder import Decoder

if __name__ == '__main__':
    cdr = Coder("In all the examples so far, the elements of a are provided by the iterator one at a time, because all the looping logic is internal to the iterator. While this is simple and convenient, it is not very efficient. A better approach is to move the one-dimensional innermost loop into your code, external to the iterator. This way, NumPy’s vectorized operations can be used on larger chunks of the elements being visited.")
    cdr.encode()
    dcdr = Decoder()
    text = dcdr.decode()
    print(text)
Exemplo n.º 60
0
 def test_empty_string(self):
     decoder = Decoder('')
     self.assertEqual(decoder.decode_message(3), '')
     self.assertEqual(decoder.decode_message(14), '')
     self.assertEqual(decoder.decode_message(26), '')