예제 #1
0
def main(config):
    # 读取在 DATA_FOR_CORPUS_PATH 目录下的全部xls格式文件
    file_names = os.listdir(config["DATA_FOR_CORPUS_PATH"])
    chat = []
    for file in file_names:
        if ".xls" in file:
            print(file)
            data = pd.read_excel(config["DATA_FOR_CORPUS_PATH"] + "/" + file)
            chat += list(data['对话内容'])

    # 得到训练语料
    fin = open(config["corpus_file"], 'w', encoding='utf-8')
    for t in range(len(chat)):
        word_segment = []
        curr_chat = str.lower(str(chat[t]))
        sentence_segment = [i for i in curr_chat.split('\r\n') if i]
        if len(sentence_segment) > 3:
            for sentence in sentence_segment[:-1]:
                if "访客" not in sentence and "客服人员说:" not in sentence:
                    if "欢迎使用中国银联网络客服" not in sentence:
                        word_segment += [
                            word for word in utils.segment(
                                utils.remove_url(sentence)) if word != ' '
                        ]
        if word_segment:
            fin.write(' '.join(word_segment) + '\n')
    fin.close()
예제 #2
0
    def add_frame(self, image):

        mask = utils.segment(image)
        face, foundFace = utils.detect_face(image)
        mask = utils.eliminate_face(face, foundFace, mask)

        if self.__using_stabilization:
            mask = utils.stabilize(foundFace, self.__no_of_frames + 1, image,
                                   face, mask)
        hand, hand_contour = utils.get_my_hand(mask, True)

        hand_pose, direction = 'None', 'None'

        if hand_contour is not None:
            motion_detected = self.__motion.get_hand_motion(hand_contour)
        else:
            return

        if not motion_detected:
            hand_pose = recognise_hand_pose(
                hand,
                directly_from_hand=True,
                model_path='Models/silatra_gesture_signs.sav')
        else:
            direction = motion_detected

        self.__observations.append((hand_pose, direction))
        self.__no_of_frames += 1
    def bits(self, s):
        """Compute the bits of entropy in a string.

        If the string uses characters missing from the language model,
        __init__(default_bits) will be used for that element instead.

        Args:
          s: String to compute the entropy for
        """
        total_bits = 0
        for context, value in utils.all_but_the_last(utils.segment(s)):
            total_bits += self.get(context).bits(value, self.__default_bits)
        return total_bits
def compile(f, n=3):
    """Compile the raw password database.

    Builds a histogram of n tuples provided by f.

    Args:
      f: File containing raw password data
      n: tuple length (defaults to 3)

    Returns:
      LanguageModel
    """
    language_model = LanguageModel()

    for line in list(f):
        language_model.extend(utils.segment(line.strip(), n))
    return language_model
예제 #5
0
def recognise_hand_pose(image,
                        directly_from_hand=False,
                        model_path='Models/silatra_digits_and_letters.sav',
                        using_stabilization=False,
                        no_of_frames=1):
    '''
    ### SiLaTra Hand Pose Recognition

    Provides classification for input hand pose image.

    Inputs: (a) Mandatory Parameter - Image for which Hand Pose Classification is to be performed.

            (b) Optional Parameters (Use them only if you understand them):

                (1) directly_from_hand - boolean - Set this to true if you are passing already cropped hand region in `image` parameter.
                (2) model_path - String - If an alternate model is to be used, pass the path of its .sav file.
                (3) using_stabilization - boolean - If you intend to use Object stabilization, set this to True. Only use this option if you are classifying hand poses from a continuous feed, else its useless.
                (4) no_of_frames - Integer - ONLY TO BE USED IF using_stabilization IS True, pass the number of the frame from the continuous feed you are processing.
    '''

    import pickle
    from sklearn.neighbors import KNeighborsClassifier

    if not directly_from_hand:
        mask = utils.segment(image)
        face, foundFace = utils.detect_face(image)
        mask = utils.eliminate_face(face, foundFace, mask)

        if using_stabilization:
            mask = utils.stabilize(foundFace, no_of_frames, image, face, mask)

        hand = utils.get_my_hand(mask)
        if hand is False: return 'No hand pose in image'
        features = utils.extract_features(hand)
    else:
        features = utils.extract_features(image)

    classifier = pickle.load(open(model_path, 'rb'))
    hand_pose = classifier.predict([features])[0]

    return hand_pose
예제 #6
0
def segment():
    raise NotImplementedError()
    series_instance_uid = request.args.get('series_instance_uid')
    model_id = request.args.get('model_id')
    foo = request.args.get('foo',None)
    task_id = request.args.get('task_id',None)
    return_type = request.args.get('return_type',"html")

    if task_id is None:
        task = utils.segment(series_instance_uid,model_id,foo=foo)
        print(series_instance_uid,task_id,task.ready())
        return redirect(url_for('segment',
            series_instance_uid = series_instance_uid,
            model_id = model_id,
            foo = foo,
            task_id = task.id,
        ))

    task = utils.celery.AsyncResult(task_id)
    print(series_instance_uid,task_id,task.ready())

    response = {'ready': task.ready(),'task_id':task_id}

    # for debugging
    if return_type == "html":

        if task.ready() is True:
            return render_template("show_results.html",results=response,)
        else: # for learning redirect html
            return render_template("loading.html",task_id=task_id)

    else:
        if task.ready() is True:
            try:
                response["result"]=task.get()
            except:
                return jsonify(response)
            return jsonify(response)
        else:
            return jsonify(response)
예제 #7
0
def recognise_hand_pose(image, directly_from_hand=False, model_path='Models/digits_and_letters.sav', using_stabilization=False, no_of_frames=1):


    import pickle
    from sklearn.neighbors import KNeighborsClassifier

    if not directly_from_hand:
        mask = utils.segment(image)
        face, foundFace = utils.detect_face(image)
        mask = utils.eliminate_face(face, foundFace, mask)

        if using_stabilization: mask = utils.stabilize(foundFace, no_of_frames, image, face, mask)

        hand = utils.get_my_hand(mask)
        if hand is False: return 'No hand pose in image'
        features = utils.extract_features(hand)
    else: features = utils.extract_features(image)

    classifier = pickle.load(open(model_path, 'rb'))
    hand_pose = classifier.predict([features])[0]

    return hand_pose
예제 #8
0
def main(config):
    print("\n正在进行第一次聚类...\n")

    # 读取词向量模型
    model = gensim.models.Word2Vec.load(config["w2v_model_file"])

    # 读取数据
    data = pd.read_excel(config["cluster_excel_file"])

    # 得到句子平均词向量
    w2v_dim = model.vector_size
    print("词向量模型的维度为", w2v_dim)

    # 样本的特征矩阵,一列为一个样本
    feature = np.zeros([len(data), w2v_dim])
    split_word = []
    for i in range(len(data)):
        word_list = utils.segment(data.iat[i, 5])
        feature[i, :], select_word = get_mean_vec(word_list, model, w2v_dim)
        split_word.append(' '.join(select_word))

    # 只保留分词不为空的数据
    bool_idx = [
        True if split_word[i] else False for i in range(len(split_word))
    ]
    num_idx = [i for i in range(len(split_word)) if split_word[i]]
    data = data[bool_idx]
    split_word = np.array(split_word)[num_idx]
    feature = feature[num_idx, :]

    # 利用sk-learn中的K-means函数进行聚类
    y_pred = KMeans(n_clusters=config["n_cluster_first"]).fit_predict(feature)
    data["sublabel"] = y_pred
    data["cutwords"] = split_word
    data = data.sort_values(["sublabel"])

    # 保存数据
    data.to_excel(config["cluster_excel_file"], index=False)
    print("\n第一次聚类完成!\n")
예제 #9
0
        op1 = "QUIT\r\n"
        client.send(op1.encode('ascii'))
        break
   
    data = client.recv(size,socket.MSG_WAITALL)  #Reference: https://www.binarytides.com/receive-full-data-with-the-recv-socket-function-in-python/

    
    # Instead of storing the image as mentioned in the 1st reference: https://stackoverflow.com/a/23312964/5370202
    # we can directly convert it to Opencv Mat format
    # Reference: https://stackoverflow.com/a/17170855/5370202
    nparr = np.fromstring(data, np.uint8)
    img_np = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    img_np = imutils.rotate_bound(img_np,90)
    img_np = cv2.resize(img_np,(0,0), fx=0.7, fy=0.7)
    
    mask1 = utils.segment(img_np)

    
    gray = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)

    # detect faces in the grayscale image
    
    haar_cascade_face = cv2.CascadeClassifier('/home/aarav/Desktop/MajorProject/Models/haarcascade_frontalface_default.xml')
    rects = haar_cascade_face.detectMultiScale(gray, scaleFactor = 1.2, minNeighbors = 5);
    maxArea1 = 0
    faceRect = -1
    foundFace = False
    for (x,y,w,h) in rects:
        if w*h > maxArea1:
            maxArea1 = w*h
            faceRect = (x,y,w,h)
예제 #10
0
def HMM(sequence, Dic, ProDic):
    lastmaxpro, maxpre = Forward(sequence, Dic=Dic, ProDic=ProDic)
    tag = Backward(lastmaxpro, maxpre)
    res = segment(sequence, tag)
    return tag, res
from utils import segment
import cv2, numpy as np

print('Press q/Q to quit')
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FPS, 30)

lower = np.array([0, 140, 60], np.uint8)
upper = np.array([255, 180, 127], np.uint8)

while True:
    _, frame = cap.read()
    mask = segment(frame)
    '''
    Normal segmentation using YCRCB
    mask = cv2.cvtColor(frame, cv2.COLOR_BGR2YCR_CB)
    mask = cv2.inRange(mask, lower, upper)
    '''
    cv2.imshow('Original', frame)
    cv2.imshow('Segmentation mask', mask)
    key = cv2.waitKey(1)
    if key == ord('q') or key == ord('Q'): break

cv2.destroyAllWindows()
cap.release()
예제 #12
0
contour_start=False
lower = np.array([0,147,60],np.uint8)
upper = np.array([255,180,127],np.uint8)

grid = (20,20)

print('gaf0 gaf1')
while(1):
    start_time = time.time()
    _, frame = cap.read()
    
    x,y,w,h = 100,100,300,300
    cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), thickness=2)
    roi = frame[y:y+h,x:x+w]

    mask = segment(roi, lower, upper)

    if not contour_start:
        cv2.imshow('Skin segmentation using YCrCb mask',mask)
        cv2.imshow('Original',frame)
    else:
        try:
            _,thresh = cv2.threshold(mask,127,255,0)
            hand = get_my_hand(thresh)
            
            cv2.imshow('Your hand',hand)
            cv2.imshow('Original',frame)

            features = extract_features(hand, grid)
            predictions = classifier.predict_proba([features]).tolist()[0]
            for prob in predictions: print('%.2f'%(prob),end=' ')
예제 #13
0
파일: hmm.py 프로젝트: vrr-21/Silatra
classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(X, Y)

lower = np.array([0, 147, 60], np.uint8)
upper = np.array([255, 180, 127], np.uint8)

cap = cv2.VideoCapture('good afternoon.avi')

obs = []
frame_no, grid = 1, (20, 20)
print('Frame: [gaf0, gaf1]')
while (1):
    try:
        _, frame = cap.read()
        mask = segment(frame, lower, upper)
        _, thresh = cv2.threshold(mask, 127, 255, 0)
        hand = get_my_hand(thresh)
        features = extract_features(hand, grid)
        pred = classifier.predict([features])
        print('%5d' % (frame_no), end=': ')
        print(pred)
        pred = pred.tolist()
        obs.append(pred)
        frame_no += 1
    except:
        break
cap.release()

print(obs)
states = ('gaf0', 'gaf1')
                    payload = fread.readline()
                    if (payload == '\r\n' or payload == '\n'
                            or payload == '\r'):
                        continue
                    if (not payload):
                        break
                    payload = payload.strip()

                    payloads.append(payload)
                    y.append(classes[name])

        y = np.array(y)
        np.save(y_train_dir, y)

        for payload in payloads:
            tempseg = segment(payload)
            if (tempseg == []):
                print(payload)
            payloads_seged.append(tempseg)
            lens.append(len(tempseg))

        with open(lens_dir, 'wb') as f:
            pickle.dump(lens, f)

        model = Word2Vec(payloads_seged,
                         size=embedding_size,
                         iter=iter_num,
                         sg=1,
                         min_count=min_num,
                         max_vocab_size=max_voc)
예제 #15
0
파일: record.py 프로젝트: vrr-21/Silatra
import cv2, numpy as np
from utils import segment

lower = np.array([0, 147, 60], np.uint8)
upper = np.array([255, 180, 127], np.uint8)

cap = cv2.VideoCapture(0)

fourcc = cv2.VideoWriter_fourcc(*'DIVX')
out = cv2.VideoWriter('test2.avi', fourcc, 10, (300, 300))

start_rec = False
while (1):
    _, frame = cap.read()
    mask, _, _ = segment(frame, lower, upper)

    x, y, w, h = 100, 100, 300, 300
    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), thickness=2)
    roi = frame[y:y + h, x:x + w]

    cv2.imshow('You', frame)
    if not start_rec: cv2.imshow('Segmented', mask)
    else: out.write(roi)

    k = cv2.waitKey(50)
    if k == ord('q'): break
    elif k == ord('s'):
        start_rec = not start_rec
        cv2.destroyAllWindows()
cap.release()
out.release()