def main(): h5file = '/root/data/pcallier/amazon/temp.hd5' amzn_path = '/root/data/pcallier/amazon/reviews_Health_and_Personal_Care.json.gz' #azbw = AmazonBatchWriter(amzn_path, h5file) #azbw.run() from neon.backends.nervanagpu import NervanaGPU ng = NervanaGPU(0, device_id=1) NervanaObject.be = ng ng.bsz = 128 train_set = DiskDataIterator(lambda: batcher(load_data('/root/data/amazon/test_amazon.json.gz')), 3000, 128, nvocab=67) # random examples from each for bidx, (X_batch, y_batch) in enumerate(train_set): print "Batch {}:".format(bidx) #print X_batch.get().T.sum(axis=1) reviewnum = input("Pick review index to fetch and decode: ") review = from_one_hot(X_batch.get().T[reviewnum].reshape(67, -1)) print ''.join(review)[::-1]
def main(): h5file = '/root/data/pcallier/amazon/temp.hd5' amzn_path = '/root/data/pcallier/amazon/reviews_Health_and_Personal_Care.json.gz' #azbw = AmazonBatchWriter(amzn_path, h5file) #azbw.run() from neon.backends.nervanagpu import NervanaGPU ng = NervanaGPU(0, device_id=1) NervanaObject.be = ng ng.bsz = 128 train_set = DiskDataIterator( lambda: batcher(load_data('/root/data/amazon/test_amazon.json.gz')), 3000, 128, nvocab=67) # random examples from each for bidx, (X_batch, y_batch) in enumerate(train_set): print "Batch {}:".format(bidx) #print X_batch.get().T.sum(axis=1) reviewnum = input("Pick review index to fetch and decode: ") review = from_one_hot(X_batch.get().T[reviewnum].reshape(67, -1)) print ''.join(review)[::-1]
np.set_printoptions(threshold=np.nan) print "Batch properties:" print "Shape (data): {}".format(data.shape) print "Shape (label): {}".format(label.shape) print "Type: {}".format(type(data)) print print "First record of first batch:" print "Type (1 level in): {}".format(type(data[0])) print "Type of record (2 levels in): {}".format(type(data[0,0])) print data[0,0] print "Sentiment label: {}".format(label[0,0]) print "Data in numpy format:" oh = data_utils.to_one_hot(data[0,0]) print np.array_str(np.argmax(oh,axis=0)) print "Translated back into characters:\n" print ''.join(data_utils.from_one_hot(oh)) # demo balanced batching amz_balanced_batcher = batch_data(amz_train,balance_labels=True) balanced_batch = amz_balanced_batcher.next() print 'Balanced batch:' balanced_label_counts = {} for idx in range(balanced_batch[1].shape[0]): label = balanced_batch[1][idx,0] balanced_label_counts[label] = balanced_label_counts.get(label, 0) + 1 print balanced_label_counts # Demo iterator utility classes # iterate multiple times over same data if args.iterator_demo: # Demo dataIterator class
transformer_fun=None) am_test_batch = batch_data.batch_data(amte, normalizer_fun=None,transformer_fun=None) # Spit out some sample data next_batch = am_train_batch.next() data, label = next_batch np.set_printoptions(threshold=np.nan) print "Batch properties:" print "Length: {}".format(len(data)) print "Type: {}".format(type(data)) print print "First record of first batch:" print "Type (1 level in): {}".format(type(data[0])) print "Type of record (2 levels in): {}".format(type(data[0,0])) print data[0,0] print "Sentiment label: {}".format(label[0]) print "In numpy format:" oh = data_utils.to_one_hot(data[0,0]) print np.array_str(np.argmax(oh,axis=0)) print "Translated back into characters:\n" print data_utils.from_one_hot(oh) # dimension checks second_batch_data, second_batch_label = second_batch = am_train_batch.next() second_batch = list(second_batch) print len(second_batch) print "Data object type: ", type(second_batch_data) print second_batch_data.shape