def testTrimTrack(self): """ Test correct trimming on real (albeit contrived) data (assume a single track - remove leading/trailing silences) """ voicesubsamples = CP.load(open('dummyVoice.p')) voicetrack = Segmentor.segmentVoice(voicesubsamples)[0] self.assert_(min(voicetrack) == 45 and max(voicetrack) == 104)
def doTapeFlip(self): """ For 2-sided tape, save the first side and resume... """ self.status.config(text='Flip tape, then play...') # Save side one self.recsound.stop() (sampfreq,subrate) = (44100,20000) samps = Segmentor.getMonoAmpSamples(self.recsound, subrate) trackAssign = Segmentor.segmentVoice(samps) (tstart,tend) = (min(trackAssign[0])*subrate, max(trackAssign[0])*subrate) self.sideone = tkSnack.Sound() self.sideone.copy(self.recsound,start=tstart,end=tend) # Restart recording self.recsound = tkSnack.Sound() self.recsound.configure(channels="Stereo") self.recsound.configure(frequency=44100) self.recsound.record()
def testSplitTracks(self): """ Test correct track-splitting on real (albeit contrived) data """ tracksubsamples = CP.load(open('dummyTrack.p')) assign = Segmentor.segmentTracks(tracksubsamples) truth = CP.load(open('trackCorrect.p')) self.assert_(all([a == t for (a,t) in zip(assign,truth)]))
def testToyData(self): """ Test track splitting on synthetic data """ # Generate synthetic data testruns = [('gap',20), ('track',50), # Track 1 ('gap',10), ('track',30),('gap',3),('track',20), # Track 2 ('gap',15), ('track',70), # Track 3 ('gap',10),('track',1),('gap',10)] (data, labels) = (ToyData.generateData(testruns), ToyData.generateLabels(testruns)) # Segment tracks assign = Segmentor.segmentTracks(data) # There should be 3 tracks found # (fake gap and fake track should be ignored) self.assert_(len(assign) == 3)
def VegetationClassification(Img): ''' This function is used to classify the green vegetation from GSV image, This is based on object based and otsu automatically thresholding method The season of GSV images were also considered in this function Img: the numpy array image, eg. Img = np.array(Image.open(StringIO(response.content))) return the percentage of the green vegetation pixels in the GSV image By Xiaojiang Li ''' import numpy as np # import pymeanshift as pms # # # use the meanshift segmentation algorithm to segment the original GSV image # (segmented_image, labels_image, number_regions) = pms.segment(Img,spatial_radius=6, # range_radius=7, min_density=40) import Segmentor segmented_image = Segmentor.segment(Img) I = segmented_image/255.0 red = I[:,:,0] green = I[:,:,1] blue = I[:,:,2] # calculate the difference between green band with other two bands green_red_Diff = green - red green_blue_Diff = green - blue ExG = green_red_Diff + green_blue_Diff diffImg = green_red_Diff*green_blue_Diff redThreImgU = red < 0.6 greenThreImgU = green < 0.9 blueThreImgU = blue < 0.6 shadowRedU = red < 0.3 shadowGreenU = green < 0.3 shadowBlueU = blue < 0.3 del red, blue, green, I greenImg1 = redThreImgU * blueThreImgU*greenThreImgU greenImgShadow1 = shadowRedU*shadowGreenU*shadowBlueU del redThreImgU, greenThreImgU, blueThreImgU del shadowRedU, shadowGreenU, shadowBlueU greenImg3 = diffImg > 0.0 greenImg4 = green_red_Diff > 0 threshold = graythresh(ExG, 0.1) if threshold > 0.1: threshold = 0.1 elif threshold < 0.05: threshold = 0.05 greenImg2 = ExG > threshold greenImgShadow2 = ExG > 0.05 greenImg = greenImg1*greenImg2 + greenImgShadow2*greenImgShadow1 del ExG,green_blue_Diff,green_red_Diff del greenImgShadow1,greenImgShadow2 # image_show(greenImg) # plt.show(block=False) # plt.pause(1) # plt.close() # calculate the percentage of the green vegetation greenPxlNum = len(np.where(greenImg != 0)[0]) greenPercent = greenPxlNum/(400.0*400)*100 del greenImg1,greenImg2 del greenImg3,greenImg4 return greenPercent
def main(): ''' Read in CIFAR10 data (limited to 2 classes), initialize your model, and train and test your model for a number of epochs. We recommend that you train for 10 epochs and at most 25 epochs. For CS2470 students, you must train within 10 epochs. You should receive a final accuracy on the testing examples for cat and dog of >=70%. :return: None ''' # Use the titan os.environ["CUDA_VISIBLE_DEVICES"] = "1" # Segmentor or Classifier (0 or 1) choose = 1 # Initialize the model if choose: model = Classifier.Model() else: model = Segmentor.Model() # For saving/loading models # Get the date and time in a string now = datetime.now() if choose: mod_str = '_classifier' else: mod_str = '_segmentor' dt_string = now.strftime("%d.%m.%Y_%H.%M") + mod_str checkpoint_dir = dt_string checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") checkpoint = tf.train.Checkpoint(model=model) manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=80) print(manager.checkpoints) print(manager.latest_checkpoint) # Restore latest checkpoint # checkpoint.restore('./checkpoints_data/ckpt-672') checkpoint.restore(manager.latest_checkpoint) # Train it if choose: train_class(model, '/media/user1/My4TBHD1/Lung/processed_data', import_excel('./Lung-PET.xlsx', 'Lung-PENN'), manager, 0, checkpoint_dir) else: train_seg(model, '/media/user1/My4TBHD1/Lung/processed_data', manager, 0, checkpoint_dir) for i in range(10): train_seg(model, '/media/user1/My4TBHD1/Lung/processed_data', manager, 0, checkpoint_dir) manager.save() # Graph loss fig = plt.figure() ax = plt.axes() loss_graph = np.load(checkpoint_dir + '/loss.npy') print(len(loss_graph)) x = list(range(len(loss_graph))) ax.plot(x, loss_graph) plt.show() # Test it if choose: print('hi') else: patients = sorted(os.listdir('./processed_data/')) for patient in patients: print('Current patient: ', patient) test_model(model, './processed_data/' + patient)
import datetime import json import pprint import copy from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer from sklearn.decomposition import NMF, LatentDirichletAllocation import random import gensim import jieba import jieba.posseg as pseg from re import compile as _Re from gensim.models.word2vec import Word2Vec from gensim.summarization import keywords from Segmentor import * segmenter = Segmentor() tagger = POSTagger() import re from elasticsearch import Elasticsearch es = Elasticsearch([{'host': '192.168.2.10', 'port': 9200}]) def content_segmentor(article): article = "" sentences = Tokenizer.ToSents(article) for sent in sentences: # 在斷詞 words = segmenter.segment(sent) if words != []: article += ' '.join(words)
def vegetation_classification(img): """ This function is used to classify the green vegetation from GSV image, This is based on object based and otsu automatically thresholding method The season of GSV images were also considered in this function img: the numpy array image, eg. img = numpy.array(Image.open(StringIO(response.content))) return the percentage of the green vegetation pixels in the GSV image By Xiaojiang Li """ # import pymeanshift as pms # # # use the meanshift segmentation algorithm to segment the original GSV image # (segmented_image, labels_image, number_regions) = pms.segment(img,spatial_radius=6, # range_radius=7, min_density=40) import Segmentor segmented_image = Segmentor.segment(img) image_norm = segmented_image / 255.0 red = image_norm[:, :, 0] green = image_norm[:, :, 1] blue = image_norm[:, :, 2] # calculate the difference between green band with other two bands green_red_diff = green - red green_blue_diff = green - blue exg = green_red_diff + green_blue_diff diff_img = green_red_diff * green_blue_diff red_thre_img_u = red < 0.6 green_thre_img_u = green < 0.9 blue_thre_img_u = blue < 0.6 shadow_red_u = red < 0.3 shadow_green_u = green < 0.3 shadow_blue_u = blue < 0.3 del red, blue, green, image_norm gree_img1 = red_thre_img_u * blue_thre_img_u * green_thre_img_u green_img_shadow1 = shadow_red_u * shadow_green_u * shadow_blue_u del red_thre_img_u, green_thre_img_u, blue_thre_img_u del shadow_red_u, shadow_green_u, shadow_blue_u gree_img3 = diff_img > 0.0 gree_img4 = green_red_diff > 0 threshold = graythresh(exg, 0.1) if threshold > 0.1: threshold = 0.1 elif threshold < 0.05: threshold = 0.05 gree_img2 = exg > threshold green_img_shadow2 = exg > 0.05 green_img = gree_img1 * gree_img2 + green_img_shadow2 * green_img_shadow1 del exg, green_blue_diff, green_red_diff del green_img_shadow1, green_img_shadow2 # image_show(greenImg) # plt.show(block=False) # plt.pause(1) # plt.close() # calculate the percentage of the green vegetation green_pxl_num = len(numpy.where(green_img != 0)[0]) green_percent = green_pxl_num / (400.0 * 400) * 100 del gree_img1, gree_img2 del gree_img3, gree_img4 return green_percent
VALIDATION_SIZE = 1000 TRAIN_SIZE = 5172 X = data['training_data'] Y = data['training_labels'].T.ravel() randomIndex = np.random.choice(TRAIN_SIZE, TRAIN_SIZE, replace=False) # Split Data xTrain = X[randomIndex[:-VALIDATION_SIZE]] yTrain = Y[randomIndex[:-VALIDATION_SIZE]] xValidate = X[randomIndex[-VALIDATION_SIZE:]] yValidate = Y[randomIndex[-VALIDATION_SIZE:]] xTest = data['test_data'] segmentor = Segmentor() print "============= Decision Tree ==========" tree = DTree(Impurity.impurity, segmentor, depth=20) tree.train(xTrain, yTrain) labels = tree.predict(xValidate) counts = np.bincount(tree.predict(xTrain) == yTrain) error = 1.0 - (counts[True] / float(counts[True] + counts[False])) print "Training Error: %f" % (error) counts = np.bincount(labels == yValidate) error = 1.0 - (counts[True] / float(counts[True] + counts[False])) print "Validation Error: %f" % (error) #import pdb; pdb.set_trace()
def recordSound(self): """ Start/stop recording from the mic input """ if(self.state == 0): # # Start recording # self.recsound = tkSnack.Sound() self.recsound.configure(channels="Stereo") self.recsound.configure(frequency=44100) self.recsound.record() self.status.config(text='Recording...') self.record_sound.config(text="STOP",bg="red") self.state = 1 elif(self.state == 1): # # Stop recording, write tracks out to disk for burning # self.recsound.stop() # Wave sampling freq (Hz) sampfreq = 44100 # Convert to mono (take max amplitude over channels) and subsample subrate = 20000 samps = Segmentor.getMonoAmpSamples(self.recsound, subrate) # Process subsampled audio self.status.config(text='Processing audio...') if(self.tracksplit.get() == 0): # VOICE: just find single track trackAssign = Segmentor.segmentVoice(samps) elif(self.tracksplit.get() == 2): # MUSIC: segment recording into individual tracks trackAssign = Segmentor.segmentTracks(samps) # Write these tracks out to disk if(self.sideone != None): # We have a previous side-one track, write it out fn = os.path.join(datadir,'track%d.wav' % 0) self.sideone.write(fn) # Then write out the current track (tstart,tend) = (min(trackAssign[0])*subrate, max(trackAssign[0])*subrate) newTrack = tkSnack.Sound() newTrack.copy(self.recsound,start=tstart,end=tend) fn = os.path.join(datadir,'track%d.wav' % 1) newTrack.write(fn) else: # No side-one, just write these tracks out to disk for (tracknum,ta) in enumerate(trackAssign): (tstart,tend) = (min(ta)*subrate,max(ta)*subrate) newTrack = tkSnack.Sound() newTrack.copy(self.recsound,start=tstart,end=tend) fn = os.path.join(datadir,'track%d.wav' % tracknum) newTrack.write(fn) self.status.config(text='Ready to burn!') self.recsound = None self.record_sound.config(text="BURN",bg='red') self.state = 2 elif(self.state == 2): # # Burn tracks from disk to a blank CD # tracks = [fn for fn in os.listdir(datadir) if re.match('track\d+\.wav',fn)] def sortfn(a,b): aval = int(re.match('track(\d+)\.wav',a).group(1)) bval = int(re.match('track(\d+)\.wav',b).group(1)) return aval-bval tracks.sort(sortfn) # Construct cdrecord command for CD burning cmd = 'cdrecord fs=4096k -v -useinfo speed=1 ' cmd += '-dao -eject -pad -audio ' cmd += '-dev=%s ' % (self.dev) for track in tracks: cmd += '\"%s\" ' % os.path.join(datadir,track) self.status.config(text='Burning CD...') os.system(cmd) self.status.config(text='Done - CD completed') # Now that we're done, cleanup by deleting temp track files for track in tracks: os.remove(os.path.join(datadir,track)) # Reset state self.state = 0