# Get all sound names
########################################################################
explosion_sounds = get_recursive_sound_names(EXPLOSION_SOUNDS, "../")
motor_sounds = get_recursive_sound_names(MOTOR_SOUNDS, "../")
wood_sounds = get_recursive_sound_names(WOOD_SOUNDS, "../")
human_sounds = get_recursive_sound_names(HUMAN_SOUNDS, "../")
nature_sounds = get_recursive_sound_names(NATURE_SOUNDS, "../")
domestic_sounds = get_recursive_sound_names(DOMESTIC_SOUNDS, "../")
tools = get_recursive_sound_names(TOOLS_SOUNDS, "../")
#wild_animals=get_recursive_sound_names(Wild_animals)

########################################################################
# Importing balanced data from the function.
# Including audiomoth annotated files for training
########################################################################
DATA_FRAME = balancing_dataset.balanced_data(audiomoth_flag=0,
                                             mixed_sounds_flag=0)

########################################################################
# Different classes of sounds.
# You can increase the class by adding the necesssary sounds of that class
########################################################################
ALL_SOUND_NAMES = [
    'Motor_sound', 'Explosion_sound', 'Human_sound', 'Nature_sound',
    'Domestic_animals', 'Tools'
]
ALL_SOUND_LIST = explosion_sounds + motor_sounds + human_sounds + \
                 nature_sounds + domestic_sounds + tools

########################################################################
# Map all the sounds into their respective classes
# Include the similar column if a new class label is to be added
def get_req_sounds(path_to_goertzel_components):
    """
    Returns the dataframes with required sounds
    """
    pickle_files = glob.glob(path_to_goertzel_components + '*.pkl')
    #removing the duplicate files if any
    pickle_files = list(set(pickle_files))
    print 'Number of  files :', len(pickle_files)

    #saving file names as a dataframe column
    ytid = []
    for each_file in pickle_files:
        ytid.append(each_file.split('/')[-1][:11])
    arb_data_frame = pd.DataFrame()
    arb_data_frame['YTID'] = ytid

    #calling the balancing_dataset function to balance the data
    data = balancing_dataset.balanced_data()
    data = data[['wav_file', 'YTID', 'labels_name', 'Data_dist_new']]

    #merge the datframes to get the finla dataframe with required columns
    data_frame = pd.merge(data, arb_data_frame, on='YTID')

    # seperate out the different sounds based on the column ''Data_dist_new'.
    #Check for the seperating_different_sounds.py for more information about this column.

    #explosion sounds
    exp = data_frame.loc[data_frame['Data_dist_new'].apply(
        lambda arr: arr[0] == 0)]
    print "explosion sound shape:", exp.shape
    #motor sounds
    mot = data_frame.loc[data_frame['Data_dist_new'].apply(
        lambda arr: arr[0] == 1)]
    print "motor sounds shape :", mot.shape
    #nature sounds
    nat = data_frame.loc[data_frame['Data_dist_new'].apply(
        lambda arr: arr[0] == 2)]
    print "nature sounds shape :", nat.shape
    #human sounds
    hum = data_frame.loc[data_frame['Data_dist_new'].apply(
        lambda arr: arr[0] == 3)]
    print "human sounds shape :", hum.shape
    # wood sounds
    wod = data_frame.loc[data_frame['Data_dist_new'].apply(
        lambda arr: arr[0] == 4)]
    print "wood sounds shape :", wod.shape
    # domestic animals sounds
    dom = data_frame.loc[data_frame['Data_dist_new'].apply(
        lambda arr: arr[0] == 6)]
    print "domestic sounds shape :", dom.shape
    #tools sounds
    tools = data_frame.loc[data_frame['Data_dist_new'].apply(
        lambda arr: arr[0] == 7)]
    print "tools sounds shape :", tools.shape
    #wild animals sounds
    wild = data_frame.loc[data_frame['Data_dist_new'].apply(
        lambda arr: arr[0] == 5)]
    print "wild sounds shape :", wild.shape

    # return dataframe of each sounds seperately
    return mot, hum, wod, exp, dom, tools, wild, nat

# Get all sound names
ambient_sounds, impact_sounds = get_all_sound_names()
explosion_sounds = get_recursive_sound_names(EXPLOSION_SOUNDS)
motor_sounds = get_recursive_sound_names(MOTOR_SOUNDS)
wood_sounds = get_recursive_sound_names(WOOD_SOUNDS)
human_sounds = get_recursive_sound_names(HUMAN_SOUNDS)
nature_sounds = get_recursive_sound_names(NATURE_SOUNDS)
domestic_sounds = get_recursive_sound_names(DOMESTIC_SOUNDS)
tools = get_recursive_sound_names(TOOLS_SOUNDS)
#wild_animals=get_recursive_sound_names(Wild_animals)


#Importing balanced data from the function. Including audiomoth annotated files for training
DATA_FRAME = balancing_dataset.balanced_data(flag_for_audiomoth=0)
print DATA_FRAME.shape[0], "examples"

# Different classes of sounds.
#You can increase the class by adding the necesssary sounds of that class
ALL_SOUND_NAMES = ['Motor_sound', 'Explosion_sound', 'Human_sound',
                   'Nature_sound', 'Domestic_animals', 'Tools']
ALL_SOUND_LIST = explosion_sounds + motor_sounds + human_sounds + \
                 nature_sounds + domestic_sounds + tools



# Map all the sounds into their respective classes
DATA_FRAME['labels_new'] = DATA_FRAME['labels_name'].apply(lambda arr: ['Motor_sound' if x  in motor_sounds else x for x in arr])
DATA_FRAME['labels_new'] = DATA_FRAME['labels_new'].apply(lambda arr: ['Explosion_sound' if x  in explosion_sounds else x for x in arr])
DATA_FRAME['labels_new'] = DATA_FRAME['labels_new'].apply(lambda arr: ['Nature_sound' if x  in nature_sounds else x for x in arr])
HELP = 'Supported audio codec formats aac, ac3, mp2, flac, libopus,, mp3'
PARSER = argparse.ArgumentParser(description=DESCRIPTION)
PARSER.add_argument('-path_to_original_audio_files',
                    '--path_to_original_audio_files',
                    action='store',
                    help='Input the path')
PARSER.add_argument('-path_to_compressed_audio_files',
                    '--path_to_compressed_audio_files',
                    action='store',
                    help='Input the path')
PARSER.add_argument('-codec_type', '--codec_type', action='store', help=HELP)
RESULT = PARSER.parse_args()

# Read the balanced data and the get the wav files which are to be compressed
REQUIRED_DF = balancing_dataset.balanced_data()
ORIGINAL_WAV_FILE_LIST = REQUIRED_DF['wav_file'].tolist()
TYPE_OF_COMPRESSION = RESULT.codec_type

# set the path where compressed files to be written
COMPRESSED_FILES_PATH = RESULT.path_to_compressed_audio_files
ORIGINAL_WAV_FILES_PATH = RESULT.path_to_original_audio_files

# create seperte directories if not present to store compressed and decompressed files
if not os.path.exists(COMPRESSED_FILES_PATH):
    os.makedirs(COMPRESSED_FILES_PATH)

# Compressing wav files into opus format.
#We have taken opus as an example , you can also change it to mp2,flac and other required format.
for ORIGINAL_WAV in ORIGINAL_WAV_FILE_LIST:
    if os.path.exists(COMPRESSED_FILES_PATH + ORIGINAL_WAV[:-3] +