def say_text(ip: str,
             port: int,
             text: str,
             speed=1.0,
             emotion='normal') -> int:
    output_file = '{}.wav'.format(text[:10])
    jtalk.make_wav(text, speed, emotion, output_file, output_dir='wav')
    with open('wav/' + output_file, 'rb') as f:
        data = f.read()
        io.send(ip, port, 'play_wav', data)
    sound = AudioSegment.from_file('wav/' + output_file, 'wav')
    return int(sound.duration_seconds * 1000)
def stop_idle_motion(ip: str, port: int):
    io.send(ip, port, 'stop_idle_motion')
def play_idle_motion(ip: str, port: int, speed=1.0, pause=1000):
    data = json.dumps(dict(Speed=speed, Pause=pause)).encode('utf-8')
    io.send(ip, port, 'play_idle_motion', data)
def play_motion(ip: str, port: int, motion: List[dict]) -> int:
    data = json.dumps(motion).encode('utf-8')
    io.send(ip, port, 'play_motion', data)
    return sum(p['Msec'] for p in motion)
def stop_pose(ip: str, port: int):
    io.send(ip, port, 'stop_pose')
def reset_pose(ip: str, port: int, speed=1.0) -> int:
    msec = int(1000 / speed)
    pose = dict(Msec=msec, ServoMap=HOME_ALL_SERVO_MAP, LedMap=HOME_LED_MAP)
    data = json.dumps(pose).encode('utf-8')
    io.send(ip, port, 'play_pose', data)
    return msec
def play_pose(ip: str, port: int, pose: dict) -> int:
    data = json.dumps(pose).encode('utf-8')
    io.send(ip, port, 'play_pose', data)
    return pose['Msec']
def stop_wav(ip: str, port: int):
    io.send(ip, port, 'stop_wav')
def play_wav(ip: str, port: int, wav_file: str) -> int:
    with open(wav_file, 'rb') as f:
        data = f.read()
        io.send(ip, port, 'play_wav', data)
    sound = AudioSegment.from_file(wav_file, 'wav')
    return int(sound.duration_seconds * 1000)