Exemplo n.º 1
0
    def get_user_playlists(ploads_json, playlist_json):
        ploads = JsonManager.load_json_from_file(
            ploads_json)['using_access_token']

        response = requests.get('https://api.spotify.com/v1/me/playlists',
                                params=ploads)
        print(f"get_users_playlists {response.status_code:12}")

        JsonManager.dump_into_json_file(playlist_json, response.json())
Exemplo n.º 2
0
    def get_token_with_code(ploads_json, tokens_json):
        ploads = JsonManager.load_json_from_file(ploads_json)['using_code']

        response = requests.post('https://accounts.spotify.com/api/token',
                                 data=ploads)
        print(f"get_token_with_code {response.status_code:12}")

        JsonManager.dump_into_json_file(tokens_json, response.json())
        JsonManager.move_token_to_ploads(ploads_json, tokens_json)
Exemplo n.º 3
0
    def get_user_info(ploads_json, user_info_json):

        ploads = JsonManager.load_json_from_file(
            ploads_json)['using_access_token']

        response = requests.get('https://api.spotify.com/v1/me', params=ploads)
        print(f"get_user_info {response.status_code:18}")

        JsonManager.dump_into_json_file(user_info_json, response.json())
        JsonManager.move_user_id_to_ploads(ploads_json, user_info_json)
Exemplo n.º 4
0
    def __init__(self, json_file_path, log_file_path):
        """Summary
		
		Args:
		    json_file_path (str): Full filepath to config.json
		    log_file_path (str): Full filepath to output.log file 
		"""
        print(
            f"BehaviorTree building started using {json_file_path} and {log_file_path}"
        )
        self.json_manager = JsonManager(json_file_path)
        self.log_file = open(log_file_path, "r")
Exemplo n.º 5
0
    def get_user_playlist(ploads_json, playlist_json, playlist_name):
        if False == SpotifyRequests.check_if_a_playlist_exists(
                ploads_json, playlist_json, playlist_name):
            print("Error: Playlist doesn't exist")
            return None

        playlists = JsonManager.load_json_from_file(playlist_json)

        for playlist in playlists['items']:
            if playlist['name'] == playlist_name:
                JsonManager.dump_into_json_file('jsons/playlist.json',
                                                playlist)

                return playlist
Exemplo n.º 6
0
    def do_GET(self):
        if '/callback?c' in self.path:
            self.code = self.path[15:]

            code_json = {"code": self.code}

            JsonManager.dump_into_json_file('jsons/code.json', code_json)
            JsonManager.move_code_to_ploads('jsons/ploads.json',
                                            'jsons/code.json')

            HttpServer.shutdown_server()

        elif '/callback?e' in self.path:
            print("Error: Access Denied")
            HttpServer.shutdown_server()
Exemplo n.º 7
0
    def print_user_playlists(ploads_json, playlist_json):
        SpotifyRequests.get_user_playlists(ploads_json, playlist_json)

        playlists = JsonManager.load_json_from_file(playlist_json)

        for i in range(len(playlists['items'])):
            print((i + 1), playlists['items'][i]['name'])
Exemplo n.º 8
0
    def get_token(ploads_json, tokens_json):
        token = JsonManager.load_json_from_file(
            ploads_json)['using_refresh_token']['refresh_token']

        if token != "":
            SpotifyRequests.get_token_with_refresh_token(
                ploads_json, tokens_json)
        else:
            SpotifyRequests.get_token_without_code(ploads_json, tokens_json)
Exemplo n.º 9
0
    def get_code(ploads_json):
        ploads = JsonManager.load_json_from_file(ploads_json)['GET_code']

        response = requests.get('https://accounts.spotify.com/authorize',
                                params=ploads)
        print(f"get_code {response.status_code:23}")

        webbrowser.open(response.url)

        HttpServer.start_server()
Exemplo n.º 10
0
    def create_a_playlist(ploads_json, user_info_json, playlist_json,
                          playlist_name):
        if SpotifyRequests.check_if_a_playlist_exists(ploads_json,
                                                      playlist_json,
                                                      playlist_name):
            print("Playlist already exists")
            return None

        SpotifyRequests.get_user_info(ploads_json, user_info_json)

        ploads = JsonManager.load_json_from_file(ploads_json)
        user_id = ploads['using_access_token']['user_id']
        ploads['create_playlist_body']['name'] = playlist_name

        url = f'https://api.spotify.com/v1/users/{user_id}/playlists'
        data = JsonManager.dump_into_json_string(
            ploads['create_playlist_body'])
        headers = ploads['using_authorization_token']

        response = requests.post(url, data=data, headers=headers)
        print(f"create_a_playlist {response.status_code:14}")
Exemplo n.º 11
0
    def check_if_a_playlist_exists(ploads_json, playlist_json, playlist_name):
        SpotifyRequests.get_user_playlists(ploads_json, playlist_json)

        playlists = JsonManager.load_json_from_file(playlist_json)

        for playlist in playlists['items']:
            name = playlist['name']

            if playlist_name == name:
                return True

        return False
Exemplo n.º 12
0
    def get_token_with_refresh_token(ploads_json, tokens_json):
        ploads = JsonManager.load_json_from_file(
            ploads_json)['using_refresh_token']

        response = requests.post('https://accounts.spotify.com/api/token',
                                 data=ploads)
        print(f"get_token_with_refresh_token {response.status_code}")

        if 'refresh_token' in response.json().keys():
            JsonManager.dump_into_json_file(tokens_json, response.json())
        else:
            response_dict = response.json()
            response_dict['refresh_token'] = ploads['refresh_token']

            JsonManager.dump_into_json_file(tokens_json, response_dict)

        JsonManager.move_token_to_ploads(ploads_json, tokens_json)
Exemplo n.º 13
0
    def unfollow_user_playlist(ploads_json, playlist_json, playlist_name):
        if False == SpotifyRequests.check_if_a_playlist_exists(
                ploads_json, playlist_json, playlist_name):
            print("Error: Playlist doesn't exist")
            return None

        playlist = SpotifyRequests.get_user_playlist(ploads_json,
                                                     playlist_json,
                                                     playlist_name)
        ploads = JsonManager.load_json_from_file(
            ploads_json)['using_authorization_token']

        playlist_id = playlist['id']

        url = f'https://api.spotify.com/v1/playlists/{playlist_id}/followers'
        headers = ploads

        response = requests.delete(url, headers=headers)

        print(f"unfollow_user_playlist {response.status_code:9}")
def run_upsample(json_file_path, fmt_file_path):
    json_manager = JsonManager(json_file_path)

    if json_manager.get_upsample_status() == True:
        print(f"Upsampling started using {json_file_path} and {fmt_file_path}")
        upsampled_path = json_manager.get_upsampled_path()
        constants.remove_folder_if_exists(\
         constants.UPSAMPLED_CSV_FOLDER_NAME, upsampled_path)

        hot_encoded_folder = os.fsdecode(os.path.join(\
         json_manager.get_hot_encoded_path(), \
         constants.HOT_ENCODED_CSV_FOLDER_NAME))

        hot_encoded_file = os.fsdecode(os.path.join(\
         hot_encoded_folder, \
         constants.HOT_ENCODED_CSV_FILENAME))

        hotEncoded_data = pd.read_csv(hot_encoded_file)
        features_data = pd.read_csv(hot_encoded_file, \
        usecols = list(hotEncoded_data.columns)[:-1]) # everything except label
        labels_data = pd.read_csv(hot_encoded_file, \
        usecols = [list(hotEncoded_data.columns)[-1]]) # label

        sm = SVMSMOTE(random_state=json_manager.get_random_state())
        X_res, y_res = sm.fit_resample(features_data, labels_data)
        csv_ready = np.append(X_res, y_res, axis=constants.COLUMN_AXIS)

        upsampled_folder = constants.add_folder_to_directory(\
         constants.UPSAMPLED_CSV_FOLDER_NAME, upsampled_path)

        upsampled_file_path = os.fsdecode(os.path.join(\
         upsampled_folder, constants.UPSAMPLED_CSV_FILENAME))

        if os.path.exists(upsampled_file_path):
            os.remove(upsampled_file_path)

        f = open(fmt_file_path, "r")
        fmt = f.readline()
        f.close()

        header = ','.join(str(i) for i in hotEncoded_data.columns)
        np.savetxt(upsampled_file_path, csv_ready, \
         fmt = fmt, \
         delimiter = constants.CSV_DELIMITER, \
         header = header, \
         comments='')
        print(f"Upsampling finished, results in {upsampled_file_path}")
def run_hotencode(json_file_path):
    global add_last_action_taken
    print(f"Hot encoding started using {json_file_path}")

    json_manager = JsonManager(json_file_path)
    feature_list = json_manager.get_feature_columns()
    categorical_features = json_manager.get_categorical_features()
    add_last_action_taken = json_manager.get_add_last_action_taken()

    if add_last_action_taken:
        categorical_features.append(constants.LAST_ACTION_TAKEN_COLUMN_NAME)
    binary_features = json_manager.get_binary_features()
    hot_encoded_path = json_manager.get_hot_encoded_path()

    constants.remove_folder_if_exists(\
     constants.HOT_ENCODED_CSV_FOLDER_NAME, hot_encoded_path)

    hot_encoded_folder = constants.add_folder_to_directory(\
     constants.HOT_ENCODED_CSV_FOLDER_NAME, hot_encoded_path)
    hot_encoded_file_path = os.fsdecode(os.path.join(\
     hot_encoded_folder, constants.HOT_ENCODED_CSV_FILENAME))

    normalized_folder = os.fsdecode(os.path.join(\
     json_manager.get_normalized_path(), \
     constants.NORMALIZED_CSV_FOLDER_NAME))

    combined_csv_file = os.fsdecode(os.path.join(\
     normalized_folder, \
     constants.COMBINED_CSV_FILENAME))

    feature_columns = generate_feature_col_dictionary(
        get_header_row(combined_csv_file), feature_list, False)

    features_data = pd.read_csv(combined_csv_file, usecols=feature_columns)

    features_data[binary_features] = features_data[binary_features].fillna(0)
    features_data[binary_features] = features_data[binary_features].astype(
        bool)
    binary_columns_array = features_data[binary_features].to_numpy()

    # hot encoded features
    hot_encoded_array, hot_encoded_header = hot_encode_features(\
     features_data, categorical_features)

    # remove hot encoded features from features_data dataframe
    features_data = features_data.drop(columns=categorical_features +
                                       binary_features)
    features_data_array = features_data.to_numpy()

    # encode labels
    labels_data = pd.read_csv(combined_csv_file, \
     usecols = [constants.LABEL_COLUMN_NAME])
    label_encoder, labels_column_array = encode_label_column(labels_data)

    # add hot_encoded columns, than numerical columns, then encoded labels to one array
    final_csv = np.concatenate(\
     (hot_encoded_array, binary_columns_array, \
      features_data_array, labels_column_array), \
     axis = constants.COLUMN_AXIS)

    # make_formatter_string(hot_encoded_header, numerical_columns, label_column)
    hot_encode_fmt = "%s," * len(
        hot_encoded_header +
        binary_features)  # format hot encoded columns to binary features
    feature_data_fmt = "%1.3f," * len(
        features_data.columns)  # format numerical columns to doubles
    total_fmt = hot_encode_fmt + feature_data_fmt + "%i"  # for label

    final_header = ','.join(
        str(i) for i in (hot_encoded_header + binary_features +
                         list(features_data.columns)))
    final_header += "," + constants.LABEL_COLUMN_NAME  # for label


    np.savetxt(hot_encoded_file_path, final_csv, \
     fmt = total_fmt, \
     header = final_header, \
     delimiter = constants.CSV_DELIMITER, \
     comments='')

    f = open(OUTPUT_LOG_FILE, "w")
    f.write("{}\n".format(total_fmt))
    f.write(str((label_encoder.classes_).tolist()))
    f.close()
    print(f"Hot Encoding finished, results in {hot_encoded_file_path}")
Exemplo n.º 16
0
def run_normalize(json_file_path):
    global add_last_action_taken
    print(f"Normalizing started using {json_file_path}")

    json_manager = JsonManager(json_file_path)
    csv_folder = json_manager.get_csv_path()
    normalized_folder = json_manager.get_normalized_path()
    feature_list = json_manager.get_feature_columns()
    label_columns = json_manager.get_label_columns()
    lag_features = json_manager.get_lag_features()
    lag_window_length = json_manager.get_sliding_window_length()
    add_last_action_taken = json_manager.get_add_last_action_taken()

    constants.remove_folder_if_exists(\
     constants.NORMALIZED_CSV_FOLDER_NAME, normalized_folder)

    destination_path = constants.add_folder_to_directory(\
     constants.NORMALIZED_CSV_FOLDER_NAME, normalized_folder)

    for file in os.listdir(csv_folder):
        complete_file_path = os.fsdecode(os.path.join(csv_folder, file))
        last_action_taken = None

        if is_file_CSV(file):
            print(f"Reading in csv: {complete_file_path}")
            normalized_filename = make_modified_filename(\
             file, CSV_NAME_EXTENSION)
            normalized_file_path = os.fsdecode(os.path.join(\
             destination_path, normalized_filename))

            current_csv_obj = open(complete_file_path)
            normalized_csv_obj = open(normalized_file_path, mode='w')

            csv_reader = csv.reader(current_csv_obj, \
             delimiter = constants.CSV_DELIMITER)
            csv_writer = csv.writer(normalized_csv_obj, \
             delimiter = constants.CSV_DELIMITER, \
             quotechar = constants.CSV_QUOTECHAR, \
             quoting=csv.QUOTE_MINIMAL)

            all_lag_queues = [[""] * lag_window_length
                              for lag_feature in lag_features]

            header_row = list(feature_list)
            if (add_last_action_taken):
                header_row.append(constants.LAST_ACTION_TAKEN_COLUMN_NAME)
            header_row.append(constants.LABEL_COLUMN_NAME)
            csv_writer.writerow(header_row)

            header_row_being_read = True
            for timeseries_row in csv_reader:
                if header_row_being_read:
                    feature_columns = generate_feature_col_dictionary(
                        timeseries_row, feature_list, False)
                    label_indices = list(
                        generate_feature_col_dictionary(
                            timeseries_row, label_columns, True).values())
                    header_row_being_read = False
                    continue

                label_values = [
                    timeseries_row[index] for index in label_indices
                ]
                label_value = next((label_value for label_value in label_values \
                 if label_value), None)

                if label_value:
                    new_normalize_row = []
                    for column_name, column_index in feature_columns.items():
                        if column_name in lag_features:
                            index = lag_features.index(column_name)
                            lagged_feature = update_lag_feature_queue(\
                             all_lag_queues[index], timeseries_row[column_index])
                            new_normalize_row.append(lagged_feature)
                        elif column_name == constants.LAST_ACTION_TAKEN_COLUMN_NAME:
                            new_normalize_row.append(last_action_taken)
                        else:
                            new_normalize_row.append(\
                             timeseries_row[feature_columns[column_name]])
                    new_normalize_row.append(label_value)
                    last_action_taken = label_value
                    csv_writer.writerow(new_normalize_row)
                else:
                    for column_index, column_name in enumerate(lag_features):
                        value = timeseries_row[feature_columns[column_name]]
                        update_lag_feature_queue(all_lag_queues[column_index],
                                                 value)

            current_csv_obj.close()
            normalized_csv_obj.close()

    combined_csv_file_path = os.path.join(destination_path,
                                          constants.COMBINED_CSV_FILENAME)

    if os.path.exists(combined_csv_file_path):
        os.remove(combined_csv_file_path)
    combined_csv = pd.concat([pd.read_csv(os.fsdecode(os.path.join(destination_path, f)))\
     for f in os.listdir(destination_path)])
    combined_csv.to_csv( os.fsdecode(combined_csv_file_path), \
     index = False, encoding = 'utf-8-sig')
    print(f"Normalizing finished, results in {normalized_file_path}")
Exemplo n.º 17
0
def main():
    json_file_path = process_command_line_args()
    json_manager = JsonManager(json_file_path)
    feature_columns = json_manager.get_feature_columns()
    categorical_features = json_manager.get_categorical_features()
    binary_features = json_manager.get_binary_features()
    hot_encoded_path = json_manager.get_hot_encoded_path()

    normalized_folder = os.fsdecode(os.path.join(\
     json_manager.get_normalized_path(), \
     constants.NORMALIZED_CSV_FOLDER_NAME))
    combined_csv_file = os.fsdecode(os.path.join(\
     normalized_folder, \
     constants.COMBINED_CSV_FILENAME))

    features_data = pd.read_csv(combined_csv_file, usecols=feature_columns)

    for binary_variable in binary_features:
        features_data[binary_variable] = features_data[binary_variable].fillna(
            value=-1)
        features_data[binary_variable] = features_data[binary_variable] * 1
    true_false_columns = features_data[binary_features]
    true_false_columns_array = true_false_columns.to_numpy()

    # true_false_features(features_data, true_false_features)

    # hot encoded features
    hot_encoded_array, hot_encoded_header = hot_encode_features(\
     features_data, categorical_features)

    # remove hot encoded features from features_data dataframe
    features_data = features_data.drop(columns=categorical_features +
                                       binary_features)
    features_data_array = features_data.to_numpy()

    # encode labels
    labels_data = pd.read_csv(combined_csv_file, \
     usecols = [constants.LABEL_COLUMN_NAME])
    label_encoder, labels_column_array = encode_label_column(labels_data)

    # add hot_encoded columns, than numerical columns, then encoded labels to one array
    final_csv = np.concatenate(\
     (hot_encoded_array, binary_columns_array, \
      features_data_array, labels_column_array), \
     axis = constants.COLUMN_AXIS)

    hot_encoded_folder = constants.add_folder_to_directory(\
     constants.HOT_ENCODED_CSV_FOLDER_NAME, hot_encoded_path)
    hot_encoded_file_path = os.fsdecode(os.path.join(\
     hot_encoded_folder, constants.HOT_ENCODED_CSV_FILENAME))

    if os.path.exists(hot_encoded_file_path):
        os.remove(hot_encoded_file_path)

    # make_formatter_string(hot_encoded_header, numerical_columns, label_column)
    hot_encode_fmt = "%i," * len(
        hot_encoded_header +
        binary_features)  # format hot encoded columns to ints
    feature_data_fmt = "%1.3f," * len(
        features_data.columns)  # format numerical columns to doubles
    total_fmt = hot_encode_fmt + feature_data_fmt + "%i"  # for label

    final_header = ','.join(
        str(i) for i in (hot_encoded_header + binary_features +
                         list(features_data.columns)))
    final_header += "," + constants.LABEL_COLUMN_NAME  # for label


    np.savetxt(hot_encoded_file_path, final_csv, \
     fmt = total_fmt, \
     header = final_header, \
     delimiter = constants.CSV_DELIMITER, \
     comments='')

    f = open(OUTPUT_LOG_FILE, "w")
    f.write("{}\n".format(total_fmt))
    f.write(str((label_encoder.classes_).tolist()))
    f.close()
Exemplo n.º 18
0
class SpotifyRequests:

    JsonManager.move_ids_to_ploads('jsons/ploads.json', 'jsons/ids.json')

    @staticmethod
    def get_code(ploads_json):
        ploads = JsonManager.load_json_from_file(ploads_json)['GET_code']

        response = requests.get('https://accounts.spotify.com/authorize',
                                params=ploads)
        print(f"get_code {response.status_code:23}")

        webbrowser.open(response.url)

        HttpServer.start_server()

    @staticmethod
    def get_token_with_code(ploads_json, tokens_json):
        ploads = JsonManager.load_json_from_file(ploads_json)['using_code']

        response = requests.post('https://accounts.spotify.com/api/token',
                                 data=ploads)
        print(f"get_token_with_code {response.status_code:12}")

        JsonManager.dump_into_json_file(tokens_json, response.json())
        JsonManager.move_token_to_ploads(ploads_json, tokens_json)

    @staticmethod
    def get_token_without_code(ploads_json, tokens_json):
        SpotifyRequests.get_code(ploads_json)
        SpotifyRequests.get_token_with_code(ploads_json, tokens_json)

    @staticmethod
    def get_token_with_refresh_token(ploads_json, tokens_json):
        ploads = JsonManager.load_json_from_file(
            ploads_json)['using_refresh_token']

        response = requests.post('https://accounts.spotify.com/api/token',
                                 data=ploads)
        print(f"get_token_with_refresh_token {response.status_code}")

        if 'refresh_token' in response.json().keys():
            JsonManager.dump_into_json_file(tokens_json, response.json())
        else:
            response_dict = response.json()
            response_dict['refresh_token'] = ploads['refresh_token']

            JsonManager.dump_into_json_file(tokens_json, response_dict)

        JsonManager.move_token_to_ploads(ploads_json, tokens_json)

    @staticmethod
    def get_token(ploads_json, tokens_json):
        token = JsonManager.load_json_from_file(
            ploads_json)['using_refresh_token']['refresh_token']

        if token != "":
            SpotifyRequests.get_token_with_refresh_token(
                ploads_json, tokens_json)
        else:
            SpotifyRequests.get_token_without_code(ploads_json, tokens_json)

    @staticmethod
    def get_user_info(ploads_json, user_info_json):

        ploads = JsonManager.load_json_from_file(
            ploads_json)['using_access_token']

        response = requests.get('https://api.spotify.com/v1/me', params=ploads)
        print(f"get_user_info {response.status_code:18}")

        JsonManager.dump_into_json_file(user_info_json, response.json())
        JsonManager.move_user_id_to_ploads(ploads_json, user_info_json)

    @staticmethod
    def get_user_playlists(ploads_json, playlist_json):
        ploads = JsonManager.load_json_from_file(
            ploads_json)['using_access_token']

        response = requests.get('https://api.spotify.com/v1/me/playlists',
                                params=ploads)
        print(f"get_users_playlists {response.status_code:12}")

        JsonManager.dump_into_json_file(playlist_json, response.json())

    @staticmethod
    def check_if_a_playlist_exists(ploads_json, playlist_json, playlist_name):
        SpotifyRequests.get_user_playlists(ploads_json, playlist_json)

        playlists = JsonManager.load_json_from_file(playlist_json)

        for playlist in playlists['items']:
            name = playlist['name']

            if playlist_name == name:
                return True

        return False

    @staticmethod
    def create_a_playlist(ploads_json, user_info_json, playlist_json,
                          playlist_name):
        if SpotifyRequests.check_if_a_playlist_exists(ploads_json,
                                                      playlist_json,
                                                      playlist_name):
            print("Playlist already exists")
            return None

        SpotifyRequests.get_user_info(ploads_json, user_info_json)

        ploads = JsonManager.load_json_from_file(ploads_json)
        user_id = ploads['using_access_token']['user_id']
        ploads['create_playlist_body']['name'] = playlist_name

        url = f'https://api.spotify.com/v1/users/{user_id}/playlists'
        data = JsonManager.dump_into_json_string(
            ploads['create_playlist_body'])
        headers = ploads['using_authorization_token']

        response = requests.post(url, data=data, headers=headers)
        print(f"create_a_playlist {response.status_code:14}")

    @staticmethod
    def print_user_playlists(ploads_json, playlist_json):
        SpotifyRequests.get_user_playlists(ploads_json, playlist_json)

        playlists = JsonManager.load_json_from_file(playlist_json)

        for i in range(len(playlists['items'])):
            print((i + 1), playlists['items'][i]['name'])

    @staticmethod
    def get_user_playlist(ploads_json, playlist_json, playlist_name):
        if False == SpotifyRequests.check_if_a_playlist_exists(
                ploads_json, playlist_json, playlist_name):
            print("Error: Playlist doesn't exist")
            return None

        playlists = JsonManager.load_json_from_file(playlist_json)

        for playlist in playlists['items']:
            if playlist['name'] == playlist_name:
                JsonManager.dump_into_json_file('jsons/playlist.json',
                                                playlist)

                return playlist

    @staticmethod
    def unfollow_user_playlist(ploads_json, playlist_json, playlist_name):
        if False == SpotifyRequests.check_if_a_playlist_exists(
                ploads_json, playlist_json, playlist_name):
            print("Error: Playlist doesn't exist")
            return None

        playlist = SpotifyRequests.get_user_playlist(ploads_json,
                                                     playlist_json,
                                                     playlist_name)
        ploads = JsonManager.load_json_from_file(
            ploads_json)['using_authorization_token']

        playlist_id = playlist['id']

        url = f'https://api.spotify.com/v1/playlists/{playlist_id}/followers'
        headers = ploads

        response = requests.delete(url, headers=headers)

        print(f"unfollow_user_playlist {response.status_code:9}")
def main():
	json_file_path, log_file_path = process_command_line_args()
	json_manager = JsonManager(json_file_path)

	log_file = open(log_file_path, "r")
	fmt = log_file.readline()
	label_encoding = eval(log_file.readline())
	log_file.close()

	supervised_learning_data = None
	if json_manager.get_upsample_status() == True:
		upsampled_folder = os.fsdecode(os.path.join(\
			json_manager.get_upsampled_path(), constants.UPSAMPLED_CSV_FOLDER_NAME))

		supervised_learning_data = os.fsdecode(os.path.join(\
			upsampled_folder, constants.UPSAMPLED_CSV_FILENAME))
	else:
		hot_encoded_folder = os.fsdecode(os.path.join(\
			json_manager.get_hot_encoded_path(), constants.HOT_ENCODED_CSV_FOLDER_NAME))
		supervised_learning_data = os.fsdecode(os.path.join(\
			hot_encoded_folder, constants.HOT_ENCODED_CSV_FILENAME))

	supervised_learning_dataframe = pd.read_csv(supervised_learning_data)
	features_data = pd.read_csv(supervised_learning_data, \
		usecols = list(supervised_learning_dataframe.columns)[:-1])
	labels_data = pd.read_csv(supervised_learning_data, \
		usecols = [list(supervised_learning_dataframe.columns)[-1]])

	kFold = json_manager.get_kfold()
	max_depth = json_manager.get_decision_tree_depth()
	output_folder = constants.add_folder_to_directory(\
		constants.OUTPUT_FOLDER_NAME, json_manager.get_output_path())
	folder_name = "{}_kFold_{}_maxDepth".format(kFold, max_depth)
	output_full_path = constants.add_folder_to_directory(folder_name, output_folder)

	clfs = []
	trains_accu = []
	test_accu = []
	# for j in range(4):
	kf = KFold(shuffle = True, n_splits = kFold)
	for train_index, test_index in kf.split(features_data):
		X_train, X_test = features_data.iloc[train_index], features_data.iloc[test_index]
		y_train, y_test = labels_data.iloc[train_index], labels_data.iloc[test_index]

		clf = tree.DecisionTreeClassifier(random_state = json_manager.get_random_state(), \
			max_depth = max_depth)
		clf = clf.fit(X_train, y_train)

		trains_accu.append(clf.score(X_train, y_train))
		test_accu.append(clf.score(X_test, y_test))
		clfs.append(clf)

	report_file = "{}_kFold_{}_maxDepth.txt".format(kFold, max_depth)
	dot_pdf_header = "{}_kFold_{}_maxDepth".format(kFold, max_depth)

	report_file_path = os.path.join(output_full_path, report_file)
	# if os.path.exists(decisionTreeFile_path): 
	# 	os.remove(decisionTreeFile_path)

	report_file_obj = open(report_file_path, "w")
	report_file_obj.write("Decision Tree with max_depth: {}, and kFold: {}\n".format(\
		max_depth, kFold))
	report_file_obj.write("	Average train error with {} fold: {}\n".format(\
		kFold, sum(trains_accu)/len(trains_accu)))
	report_file_obj.write("	Average test error with {} fold: {}\n".format(\
		kFold, sum(test_accu)/len(test_accu)))
	report_file_obj.write("	Decision Tree (DOT format) saved to: {}\n".format(dot_pdf_header))
	report_file_obj.write("	Decision Tree (PDF format) saved to: {}.pdf\n".format(dot_pdf_header))
	report_file_obj.write("Check {} for appropriate pruning.\n\n\n".format(PRUNING_GRAPH_FILENAME))

	clf = tree.DecisionTreeClassifier(random_state = json_manager.get_random_state(), \
		max_depth = max_depth)
	clf = clf.fit(features_data, labels_data)
	dot_pdf_full_path = os.fsdecode(os.path.join(output_full_path, dot_pdf_header))
	plot_decision_tree(clf, dot_pdf_full_path, features_data.columns)

	prune_path = clf.cost_complexity_pruning_path(features_data, labels_data)
	ccp_alphas, impurities = prune_path.ccp_alphas, prune_path.impurities


	pruning_folder = constants.add_folder_to_directory(\
		constants.PRUNE_FOLDER_NAME, output_full_path)

	clfs = []
	train_scores = []
	for i, ccp_alpha in enumerate(ccp_alphas):
		clf = tree.DecisionTreeClassifier(random_state = json_manager.get_random_state(), \
			max_depth = max_depth, ccp_alpha=ccp_alpha)
		clf.fit(features_data, labels_data)
		score = clf.score(features_data, labels_data)

		clfs.append(clf)
		train_scores.append(score)

		newPrunePath = constants.add_folder_to_directory("Pruning_{}".format(i), pruning_folder)
		decision_tree_path = os.fsdecode(os.path.join(\
			newPrunePath, "{}_kFold_{}_maxDepth_{}_prune".format(kFold, max_depth, i)))
		plot_decision_tree(clf, decision_tree_path, features_data.columns)

		decision_tree_obj = clf.tree_
		behavior_tree_obj = btBuilder.bt_espresso_mod(\
			decision_tree_obj, features_data.columns, label_encoding)

		behaviot_tree_full_path = os.fsdecode(os.path.join(\
			newPrunePath, constants.BEHAVIOR_TREE_XML_FILENAME))

		# btBuilder.save_tree(behavior_tree_obj, behaviot_tree_full_path)
		btBuilder.save_tree(behavior_tree_obj, newPrunePath)

		report_file_obj.write("prune: {} \n".format(i))
		report_file_obj.write("	ccp_alpha: {}, train score: {}\n".format(ccp_alpha, train_scores[i]))
		report_file_obj.write("	Decision Tree saved to {}\n".format(decision_tree_path))
		report_file_obj.write("	Behavior Tree saved to {}\n\n".format(behaviot_tree_full_path))
		report_file_obj.write("")

	fig, ax = plt.subplots()
	ax.set_xlabel("alpha")
	ax.set_ylabel("accuracy")
	ax.set_title("Accuracy vs alpha for training sets")
	ax.plot(ccp_alphas, train_scores, marker='o', label="train", drawstyle="steps-post")
	ax.legend()
	graph_path = os.fsdecode(os.path.join(output_full_path, PRUNING_GRAPH_FILENAME))
	plt.savefig(graph_path)

	report_file_obj.close()
Exemplo n.º 20
0
class Runner:
    """Main runner class for this file
	
	Attributes:
	    json_manager (json_manager.JsonManager): JSON Manager for config.json
	    log_file (_io.TextIOWrapper): Log file used for formatting
	"""
    def __init__(self, json_file_path, log_file_path):
        """Summary
		
		Args:
		    json_file_path (str): Full filepath to config.json
		    log_file_path (str): Full filepath to output.log file 
		"""
        print(
            f"BehaviorTree building started using {json_file_path} and {log_file_path}"
        )
        self.json_manager = JsonManager(json_file_path)
        self.log_file = open(log_file_path, "r")

    def get_file_fmt_and_label_encoding(self):
        """Summary
		
		Returns:
		    tuple(str, list<str>): Tuple containg string output file format and list of label encodings
		"""
        fmt = self.log_file.readline()
        label_encoding = eval(self.log_file.readline())
        self.log_file.close()
        return fmt, label_encoding

    def get_supervised_data_csv_filepath(self):
        """Returns filepath of data, uses one hot encoded if upsample = false in config.json
		
		Returns:
		    string: filepath to data csv
		"""
        data_folder = os.fsdecode(os.path.join(\
          self.json_manager.get_hot_encoded_path(), constants.HOT_ENCODED_CSV_FOLDER_NAME))
        filename = constants.HOT_ENCODED_CSV_FILENAME

        if self.json_manager.get_upsample_status():
            data_folder = os.fsdecode(os.path.join(\
             self.json_manager.get_upsampled_path(), constants.UPSAMPLED_CSV_FOLDER_NAME))
            filename = constants.UPSAMPLED_CSV_FILENAME

        return os.fsdecode(os.path.join(\
          data_folder, filename))

    def get_output_folder(self, kFold, max_depth):
        path = constants.combine_folder_and_working_dir(
            constants.PIPELINE_OUTPUT_FOLDER_NAME,
            self.json_manager.get_output_path())
        return constants.combine_folder_and_working_dir(
            "{}_kFold_{}_maxDepth".format(kFold, max_depth), path)

    def create_output_folder(self, kFold, max_depth):
        output_folder = constants.add_folder_to_directory(\
         constants.PIPELINE_OUTPUT_FOLDER_NAME, self.json_manager.get_output_path())
        folder_name = "{}_kFold_{}_maxDepth".format(kFold, max_depth)
        return constants.add_folder_to_directory(folder_name, output_folder)

    def format_float_list_to_precision(self, list_in, precision):
        prec_str = "{0:0." + str(precision) + "f}"
        return [prec_str.format(i) for i in list_in]

    def k_fold_train_decision_tree_w_max_depth(self, num_k_folds, max_depth,
                                               output_full_path):

        kf = KFold(shuffle=True, n_splits=num_k_folds)
        # build full tree on all data
        full_tree = tree.DecisionTreeClassifier(random_state = self.json_manager.get_random_state(), \
          max_depth = max_depth).fit(self.features_data, self.labels_data)

        # get set of alphas from cost_complexity_pruning
        prune_path = full_tree.cost_complexity_pruning_path(
            self.features_data, self.labels_data)
        ccp_alphas, impurities = prune_path.ccp_alphas, prune_path.impurities

        self.train_scores = [0] * len(ccp_alphas)
        self.test_scores = [0] * len(ccp_alphas)

        # split data into train/test
        for train_index, test_index in kf.split(self.features_data):
            X_train, X_test = self.features_data.iloc[
                train_index], self.features_data.iloc[test_index]
            y_train, y_test = self.labels_data.iloc[
                train_index], self.labels_data.iloc[test_index]

            # create tree on each alpha
            for i, alpha in enumerate(ccp_alphas):
                clf = tree.DecisionTreeClassifier(\
                 random_state = self.json_manager.get_random_state(), \
                 max_depth = max_depth, \
                 ccp_alpha=alpha)
                clf = clf.fit(X_train, y_train)
                self.train_scores[i] += clf.score(X_train,
                                                  y_train) / num_k_folds
                self.test_scores[i] += clf.score(X_test, y_test) / num_k_folds

    def generate_full_binary_set(self):
        bin_set = self.json_manager.get_binary_features()
        # categrorical
        cat_set = self.json_manager.get_categorical_features()

        # LAT

    def run(self):
        """Reads in data, trains, and reports results
		"""
        kFold = self.json_manager.get_kfold()
        max_depth = self.json_manager.get_decision_tree_depth()

        constants.remove_folder_if_exists(
            self.get_output_folder(kFold, max_depth))

        fmt, label_encoding = self.get_file_fmt_and_label_encoding()

        self.supervised_learning_dataframe = pd.read_csv(
            self.get_supervised_data_csv_filepath())
        self.features_data = self.supervised_learning_dataframe.loc[:, self.
                                                                    supervised_learning_dataframe
                                                                    .columns !=
                                                                    constants.
                                                                    LABEL_COLUMN_NAME]
        self.labels_data = self.supervised_learning_dataframe.loc[:, self.
                                                                  supervised_learning_dataframe
                                                                  .columns ==
                                                                  constants.
                                                                  LABEL_COLUMN_NAME]

        output_full_path = self.create_output_folder(kFold, max_depth)
        self.k_fold_train_decision_tree_w_max_depth(kFold, max_depth,
                                                    output_full_path)

        report_file = "{}_kFold_{}_maxDepth.txt".format(kFold, max_depth)
        dot_pdf_header = "{}_kFold_{}_maxDepth".format(kFold, max_depth)

        report_file_path = os.path.join(output_full_path, report_file)
        report_file_obj = open(report_file_path, "w")
        report_file_obj.write("Decision Tree with max_depth: {}, and kFold: {}\n".format(\
         max_depth, kFold))
        # report_file_obj.write("	Average train error with {} fold: {}\n".format(\
        # 	kFold, sum(self.train_scores)/len(self.train_accu)))
        # report_file_obj.write("	Average test error with {} fold: {}\n".format(\
        # 	kFold, sum(self.test_accu)/len(self.test_accu)))
        report_file_obj.write(
            "	Decision Tree (DOT format) saved to: {}\n".format(
                dot_pdf_header))
        report_file_obj.write(
            "	Decision Tree (PDF format) saved to: {}.pdf\n".format(
                dot_pdf_header))
        report_file_obj.write("Check {} for appropriate pruning.\n\n\n".format(
            PRUNING_GRAPH_FILENAME))

        clf = tree.DecisionTreeClassifier(random_state = self.json_manager.get_random_state(), \
         max_depth = max_depth)
        clf = clf.fit(self.features_data, self.labels_data)
        dot_pdf_full_path = os.fsdecode(
            os.path.join(output_full_path, dot_pdf_header))
        plot_decision_tree(clf, dot_pdf_full_path, self.features_data.columns)

        prune_path = clf.cost_complexity_pruning_path(self.features_data,
                                                      self.labels_data)
        ccp_alphas, impurities = prune_path.ccp_alphas, prune_path.impurities


        pruning_folder = constants.add_folder_to_directory(\
         constants.PRUNE_FOLDER_NAME, output_full_path)

        clfs = []
        train_scores = []
        for i, ccp_alpha in enumerate(ccp_alphas):
            clf = tree.DecisionTreeClassifier(random_state = self.json_manager.get_random_state(), \
             max_depth = max_depth, ccp_alpha=ccp_alpha)
            clf.fit(self.features_data, self.labels_data)
            score = clf.score(self.features_data, self.labels_data)

            clfs.append(clf)
            train_scores.append(score)

            newPrunePath = constants.add_folder_to_directory(
                "Pruning_{0}_{1:.6g}".format(i, ccp_alpha), pruning_folder)
            decision_tree_path = os.fsdecode(os.path.join(\
             newPrunePath, "{0}_kFold_{1}_maxDepth_{2}_{3:.6g}_prune".format(kFold, max_depth,i, ccp_alpha)))
            plot_decision_tree(clf, decision_tree_path,
                               self.features_data.columns)

            decision_tree_obj = clf.tree_

            # theoretical split to dump decision trees out to files
            # TODO: should we include categorical features as binary?
            full_binary_set = self.generate_full_binary_set()
            behavior_tree_obj = btBuilder.bt_espresso_mod(\
             decision_tree_obj,
             self.features_data.columns,
             label_encoding,
             self.json_manager.get_binary_features())

            behaviot_tree_full_path = os.fsdecode(os.path.join(\
             newPrunePath, constants.BEHAVIOR_TREE_XML_FILENAME))

            # btBuilder.save_tree(behavior_tree_obj, behaviot_tree_full_path)
            btBuilder.save_tree(behavior_tree_obj, newPrunePath)

            report_file_obj.write("prune: {} \n".format(i))
            report_file_obj.write("	ccp_alpha: {}, train score: {}\n".format(
                ccp_alpha, train_scores[i]))
            report_file_obj.write(
                "	Decision Tree saved to {}\n".format(decision_tree_path))
            report_file_obj.write("	Behavior Tree saved to {}\n\n".format(
                behaviot_tree_full_path))
            report_file_obj.write("")

        fig, ax = plt.subplots()
        ax.set_xlabel("alpha")
        ax.set_ylabel("accuracy")
        ax.set_title(
            "Accuracy vs alpha for Final Tree Prunes (note: uses all data for final training)"
        )
        ax.plot(ccp_alphas,
                self.train_scores,
                marker='o',
                label="train",
                drawstyle="steps-post")
        ax.plot(ccp_alphas,
                self.test_scores,
                marker='x',
                label="test",
                drawstyle="steps-post")
        ax.legend()
        graph_path = os.fsdecode(
            os.path.join(output_full_path, PRUNING_GRAPH_FILENAME))
        plt.savefig(graph_path)

        results_txt_file = open(
            os.fsdecode(os.path.join(output_full_path, RESULTS_TEXT_FILENAME)),
            "w")
        alist = ccp_alphas.flatten().tolist()
        acc_diffs = [
            a_i - b_i for a_i, b_i in zip(self.train_scores, self.test_scores)
        ]
        float_precision = 6
        results_txt_file.write(
            f"alphas:\t\t{self.format_float_list_to_precision(alist, float_precision)}\n"
        )
        results_txt_file.write(
            f"train acc:\t{self.format_float_list_to_precision(self.train_scores, float_precision)}\n"
        )
        results_txt_file.write(
            f"test acc:\t{self.format_float_list_to_precision(self.test_scores, float_precision)}\n"
        )
        results_txt_file.write(
            f"acc diff:\t{self.format_float_list_to_precision(acc_diffs, float_precision)}\n"
        )
        results_txt_file.close()

        report_file_obj.close()
        print(f"BehaviorTree buidling finished, results in {output_full_path}")
def main():
    json_file_path = process_command_line_args()
    json_manager = JsonManager(json_file_path)

    csv_folder = json_manager.get_csv_path()
    normalized_folder = json_manager.get_normalized_path()
    feature_columns = json_manager.get_feature_columns()
    label_columns = json_manager.get_label_columns()
    lag_features = json_manager.get_lag_features()
    lag_window_length = json_manager.get_sliding_window_length()

    destination_path = constants.add_folder_to_directory(\
     constants.NORMALIZED_CSV_FOLDER_NAME, normalized_folder)

    for file in os.listdir(csv_folder):
        complete_file_path = os.fsdecode(os.path.join(csv_folder, file))

        if is_file_CSV(file):
            normalized_filename = make_modified_filename(\
             file, CSV_NAME_EXTENSION)
            normalized_file_path = os.fsdecode(os.path.join(\
             destination_path, normalized_filename))

            current_csv_obj = open(complete_file_path)
            normalized_csv_obj = open(normalized_file_path, mode='w')

            csv_reader = csv.reader(current_csv_obj, \
             delimiter = constants.CSV_DELIMITER)
            csv_writer = csv.writer(normalized_csv_obj, \
             delimiter = constants.CSV_DELIMITER, \
             quotechar = constants.CSV_QUOTECHAR, \
             quoting=csv.QUOTE_MINIMAL)

            all_lag_queues = [[""] * lag_window_length
                              for lag_feature in lag_features]

            header_row = list(feature_columns)
            header_row.append(constants.LABEL_COLUMN_NAME)
            csv_writer.writerow(header_row)

            label_indices = list(label_columns.values())
            header_row_being_read = True
            for timeseries_row in csv_reader:
                if header_row_being_read:
                    header_row_being_read = False
                    continue
                label_values = [
                    timeseries_row[index] for index in label_indices
                ]
                label_value = next((label_value for label_value in label_values \
                 if label_value), None)

                if label_value:
                    new_normalize_row = []
                    for column_name, column_index in feature_columns.items():
                        if column_name in lag_features:
                            index = lag_features.index(column_name)
                            lagged_feature = update_lag_feature_queue(\
                             all_lag_queues[index], timeseries_row[column_index])
                            new_normalize_row.append(lagged_feature)
                        else:
                            new_normalize_row.append(\
                             timeseries_row[feature_columns[column_name]])
                    new_normalize_row.append(label_value)
                    csv_writer.writerow(new_normalize_row)
                else:
                    for column_index, column_name in enumerate(lag_features):
                        value = timeseries_row[feature_columns[column_name]]
                        update_lag_feature_queue(all_lag_queues[column_index],
                                                 value)

            current_csv_obj.close()
            normalized_csv_obj.close()

    combined_csv_file_path = os.path.join(destination_path,
                                          constants.COMBINED_CSV_FILENAME)

    if os.path.exists(combined_csv_file_path):
        os.remove(combined_csv_file_path)
    combined_csv = pd.concat([pd.read_csv(os.fsdecode(os.path.join(destination_path, f)))\
     for f in os.listdir(destination_path)])
    combined_csv.to_csv( os.fsdecode(combined_csv_file_path), \
     index = False, encoding = 'utf-8-sig')