Пример #1
0
 def testTrimTrack(self):
     """ 
     Test correct trimming on real (albeit contrived) data
     (assume a single track - remove leading/trailing silences)
     """
     voicesubsamples = CP.load(open('dummyVoice.p'))
     voicetrack = Segmentor.segmentVoice(voicesubsamples)[0]
     self.assert_(min(voicetrack) == 45 and max(voicetrack) == 104)
Пример #2
0
 def doTapeFlip(self):
     """ For 2-sided tape, save the first side and resume... """
     self.status.config(text='Flip tape, then play...')
     # Save side one
     self.recsound.stop()
     (sampfreq,subrate) = (44100,20000)
     samps = Segmentor.getMonoAmpSamples(self.recsound,
                                         subrate)            
     trackAssign = Segmentor.segmentVoice(samps)
     (tstart,tend) = (min(trackAssign[0])*subrate,
                      max(trackAssign[0])*subrate)
     self.sideone = tkSnack.Sound()
     self.sideone.copy(self.recsound,start=tstart,end=tend)
     # Restart recording
     self.recsound = tkSnack.Sound()
     self.recsound.configure(channels="Stereo")
     self.recsound.configure(frequency=44100)
     self.recsound.record()
Пример #3
0
 def testSplitTracks(self):
     """ 
     Test correct track-splitting on real (albeit contrived) data
     """
     tracksubsamples = CP.load(open('dummyTrack.p'))
     assign = Segmentor.segmentTracks(tracksubsamples)
     truth = CP.load(open('trackCorrect.p'))
     self.assert_(all([a == t for (a,t) in 
                       zip(assign,truth)]))                          
Пример #4
0
 def testToyData(self):
     """ Test track splitting on synthetic data """
     # Generate synthetic data
     testruns = [('gap',20),
                 ('track',50), # Track 1
                 ('gap',10),
                 ('track',30),('gap',3),('track',20), # Track 2
                 ('gap',15),
                 ('track',70), # Track 3
                 ('gap',10),('track',1),('gap',10)]                
     (data, labels) = (ToyData.generateData(testruns), 
                       ToyData.generateLabels(testruns))
     # Segment tracks 
     assign = Segmentor.segmentTracks(data)
     # There should be 3 tracks found 
     # (fake gap and fake track should be ignored)
     self.assert_(len(assign) == 3)
Пример #5
0
def VegetationClassification(Img):
    '''
    This function is used to classify the green vegetation from GSV image,
    This is based on object based and otsu automatically thresholding method
    The season of GSV images were also considered in this function
        Img: the numpy array image, eg. Img = np.array(Image.open(StringIO(response.content)))
        return the percentage of the green vegetation pixels in the GSV image

    By Xiaojiang Li
    '''

    import numpy as np

    # import pymeanshift as pms
    #
    # # use the meanshift segmentation algorithm to segment the original GSV image
    # (segmented_image, labels_image, number_regions) = pms.segment(Img,spatial_radius=6,
    #                                                  range_radius=7, min_density=40)

    import Segmentor


    segmented_image = Segmentor.segment(Img)

    I = segmented_image/255.0

    red = I[:,:,0]
    green = I[:,:,1]
    blue = I[:,:,2]

    # calculate the difference between green band with other two bands
    green_red_Diff = green - red
    green_blue_Diff = green - blue

    ExG = green_red_Diff + green_blue_Diff
    diffImg = green_red_Diff*green_blue_Diff

    redThreImgU = red < 0.6
    greenThreImgU = green < 0.9
    blueThreImgU = blue < 0.6

    shadowRedU = red < 0.3
    shadowGreenU = green < 0.3
    shadowBlueU = blue < 0.3
    del red, blue, green, I

    greenImg1 = redThreImgU * blueThreImgU*greenThreImgU
    greenImgShadow1 = shadowRedU*shadowGreenU*shadowBlueU
    del redThreImgU, greenThreImgU, blueThreImgU
    del shadowRedU, shadowGreenU, shadowBlueU

    greenImg3 = diffImg > 0.0
    greenImg4 = green_red_Diff > 0
    threshold = graythresh(ExG, 0.1)

    if threshold > 0.1:
        threshold = 0.1
    elif threshold < 0.05:
        threshold = 0.05

    greenImg2 = ExG > threshold
    greenImgShadow2 = ExG > 0.05
    greenImg = greenImg1*greenImg2 + greenImgShadow2*greenImgShadow1
    del ExG,green_blue_Diff,green_red_Diff
    del greenImgShadow1,greenImgShadow2

    # image_show(greenImg)
    # plt.show(block=False)
    # plt.pause(1)
    # plt.close()

    # calculate the percentage of the green vegetation
    greenPxlNum = len(np.where(greenImg != 0)[0])
    greenPercent = greenPxlNum/(400.0*400)*100
    del greenImg1,greenImg2
    del greenImg3,greenImg4

    return greenPercent
Пример #6
0
def main():
    '''
    Read in CIFAR10 data (limited to 2 classes), initialize your model, and train and
    test your model for a number of epochs. We recommend that you train for
    10 epochs and at most 25 epochs. For CS2470 students, you must train within 10 epochs.
    You should receive a final accuracy on the testing examples for cat and dog of >=70%.
    :return: None
    '''

    # Use the titan
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    # Segmentor or Classifier (0 or 1)
    choose = 1

    # Initialize the model
    if choose:
        model = Classifier.Model()
    else:
        model = Segmentor.Model()

    # For saving/loading models
    # Get the date and time in a string
    now = datetime.now()
    if choose:
        mod_str = '_classifier'
    else:
        mod_str = '_segmentor'
    dt_string = now.strftime("%d.%m.%Y_%H.%M") + mod_str
    checkpoint_dir = dt_string
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    checkpoint = tf.train.Checkpoint(model=model)
    manager = tf.train.CheckpointManager(checkpoint,
                                         checkpoint_dir,
                                         max_to_keep=80)

    print(manager.checkpoints)
    print(manager.latest_checkpoint)

    # Restore latest checkpoint
    # checkpoint.restore('./checkpoints_data/ckpt-672')
    checkpoint.restore(manager.latest_checkpoint)

    # Train it
    if choose:
        train_class(model, '/media/user1/My4TBHD1/Lung/processed_data',
                    import_excel('./Lung-PET.xlsx', 'Lung-PENN'), manager, 0,
                    checkpoint_dir)
    else:
        train_seg(model, '/media/user1/My4TBHD1/Lung/processed_data', manager,
                  0, checkpoint_dir)
        for i in range(10):
            train_seg(model, '/media/user1/My4TBHD1/Lung/processed_data',
                      manager, 0, checkpoint_dir)
        manager.save()

    # Graph loss
    fig = plt.figure()
    ax = plt.axes()
    loss_graph = np.load(checkpoint_dir + '/loss.npy')
    print(len(loss_graph))
    x = list(range(len(loss_graph)))
    ax.plot(x, loss_graph)
    plt.show()

    # Test it
    if choose:
        print('hi')
    else:
        patients = sorted(os.listdir('./processed_data/'))
        for patient in patients:
            print('Current patient: ', patient)
            test_model(model, './processed_data/' + patient)
Пример #7
0
import datetime
import json
import pprint
import copy
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
import random
import gensim
import jieba
import jieba.posseg as pseg
from re import compile as _Re
from gensim.models.word2vec import Word2Vec
from gensim.summarization import keywords

from Segmentor import *
segmenter = Segmentor()
tagger = POSTagger()
import re

from elasticsearch import Elasticsearch
es = Elasticsearch([{'host': '192.168.2.10', 'port': 9200}])


def content_segmentor(article):
    article = ""
    sentences = Tokenizer.ToSents(article)
    for sent in sentences:
        # 在斷詞
        words = segmenter.segment(sent)
        if words != []:
            article += ' '.join(words)
Пример #8
0
def vegetation_classification(img):
    """
    This function is used to classify the green vegetation from GSV image,
    This is based on object based and otsu automatically thresholding method
    The season of GSV images were also considered in this function
        img: the numpy array image, eg. img = numpy.array(Image.open(StringIO(response.content)))
        return the percentage of the green vegetation pixels in the GSV image

    By Xiaojiang Li
    """

    # import pymeanshift as pms
    #
    # # use the meanshift segmentation algorithm to segment the original GSV image
    # (segmented_image, labels_image, number_regions) = pms.segment(img,spatial_radius=6,
    #                                                  range_radius=7, min_density=40)

    import Segmentor

    segmented_image = Segmentor.segment(img)

    image_norm = segmented_image / 255.0

    red = image_norm[:, :, 0]
    green = image_norm[:, :, 1]
    blue = image_norm[:, :, 2]

    # calculate the difference between green band with other two bands
    green_red_diff = green - red
    green_blue_diff = green - blue

    exg = green_red_diff + green_blue_diff
    diff_img = green_red_diff * green_blue_diff

    red_thre_img_u = red < 0.6
    green_thre_img_u = green < 0.9
    blue_thre_img_u = blue < 0.6

    shadow_red_u = red < 0.3
    shadow_green_u = green < 0.3
    shadow_blue_u = blue < 0.3
    del red, blue, green, image_norm

    gree_img1 = red_thre_img_u * blue_thre_img_u * green_thre_img_u
    green_img_shadow1 = shadow_red_u * shadow_green_u * shadow_blue_u
    del red_thre_img_u, green_thre_img_u, blue_thre_img_u
    del shadow_red_u, shadow_green_u, shadow_blue_u

    gree_img3 = diff_img > 0.0
    gree_img4 = green_red_diff > 0
    threshold = graythresh(exg, 0.1)

    if threshold > 0.1:
        threshold = 0.1
    elif threshold < 0.05:
        threshold = 0.05

    gree_img2 = exg > threshold
    green_img_shadow2 = exg > 0.05
    green_img = gree_img1 * gree_img2 + green_img_shadow2 * green_img_shadow1
    del exg, green_blue_diff, green_red_diff
    del green_img_shadow1, green_img_shadow2

    # image_show(greenImg)
    # plt.show(block=False)
    # plt.pause(1)
    # plt.close()

    # calculate the percentage of the green vegetation
    green_pxl_num = len(numpy.where(green_img != 0)[0])
    green_percent = green_pxl_num / (400.0 * 400) * 100
    del gree_img1, gree_img2
    del gree_img3, gree_img4

    return green_percent
Пример #9
0
VALIDATION_SIZE = 1000
TRAIN_SIZE = 5172

X = data['training_data']
Y = data['training_labels'].T.ravel()
randomIndex = np.random.choice(TRAIN_SIZE, TRAIN_SIZE, replace=False)

# Split Data
xTrain = X[randomIndex[:-VALIDATION_SIZE]]
yTrain = Y[randomIndex[:-VALIDATION_SIZE]]
xValidate = X[randomIndex[-VALIDATION_SIZE:]]
yValidate = Y[randomIndex[-VALIDATION_SIZE:]]
xTest = data['test_data']

segmentor = Segmentor()

print "============= Decision Tree =========="
tree = DTree(Impurity.impurity, segmentor, depth=20)
tree.train(xTrain, yTrain)
labels = tree.predict(xValidate)

counts = np.bincount(tree.predict(xTrain) == yTrain)
error = 1.0 - (counts[True] / float(counts[True] + counts[False]))
print "Training Error: %f" % (error)

counts = np.bincount(labels == yValidate)
error = 1.0 - (counts[True] / float(counts[True] + counts[False]))
print "Validation Error: %f" % (error)

#import pdb; pdb.set_trace()
Пример #10
0
 def recordSound(self):
     """ Start/stop recording from the mic input """        
     if(self.state == 0):
         #
         # Start recording
         #
         self.recsound = tkSnack.Sound()
         self.recsound.configure(channels="Stereo")
         self.recsound.configure(frequency=44100)
         self.recsound.record()
         self.status.config(text='Recording...')
         self.record_sound.config(text="STOP",bg="red")
         self.state = 1
     elif(self.state == 1):
         #
         # Stop recording, write tracks out to disk for burning
         # 
         self.recsound.stop()
         # Wave sampling freq (Hz)
         sampfreq = 44100 
         # Convert to mono (take max amplitude over channels) and subsample
         subrate = 20000
         samps = Segmentor.getMonoAmpSamples(self.recsound,
                                             subrate)            
         # Process subsampled audio
         self.status.config(text='Processing audio...')
         if(self.tracksplit.get() == 0):
             # VOICE: just find single track
             trackAssign = Segmentor.segmentVoice(samps)                
         elif(self.tracksplit.get() == 2):
             # MUSIC: segment recording into individual tracks
             trackAssign = Segmentor.segmentTracks(samps)
         # Write these tracks out to disk                     
         if(self.sideone != None):
             # We have a previous side-one track, write it out
             fn = os.path.join(datadir,'track%d.wav' % 0)
             self.sideone.write(fn)
             # Then write out the current track
             (tstart,tend) = (min(trackAssign[0])*subrate,
                              max(trackAssign[0])*subrate)
             newTrack = tkSnack.Sound()
             newTrack.copy(self.recsound,start=tstart,end=tend)
             fn = os.path.join(datadir,'track%d.wav' % 1)
             newTrack.write(fn)
         else:                
             # No side-one, just write these tracks out to disk
             for (tracknum,ta) in enumerate(trackAssign):
                 (tstart,tend) = (min(ta)*subrate,max(ta)*subrate)
                 newTrack = tkSnack.Sound()
                 newTrack.copy(self.recsound,start=tstart,end=tend)
                 fn = os.path.join(datadir,'track%d.wav' % tracknum)
                 newTrack.write(fn)
         self.status.config(text='Ready to burn!')            
         self.recsound = None
         self.record_sound.config(text="BURN",bg='red')
         self.state = 2
     elif(self.state == 2):
         #
         # Burn tracks from disk to a blank CD 
         #
         tracks = [fn for fn in os.listdir(datadir)
                   if re.match('track\d+\.wav',fn)]
         def sortfn(a,b):
             aval = int(re.match('track(\d+)\.wav',a).group(1))
             bval = int(re.match('track(\d+)\.wav',b).group(1))
             return aval-bval
         tracks.sort(sortfn)        
         # Construct cdrecord command for CD burning
         cmd = 'cdrecord fs=4096k -v -useinfo speed=1 '
         cmd += '-dao -eject -pad -audio '
         cmd += '-dev=%s ' % (self.dev)
         for track in tracks:
             cmd += '\"%s\" ' % os.path.join(datadir,track)
         self.status.config(text='Burning CD...')
         os.system(cmd)
         self.status.config(text='Done - CD completed')
         # Now that we're done, cleanup by deleting temp track files
         for track in tracks:
             os.remove(os.path.join(datadir,track))
         # Reset state
         self.state = 0