示例#1
0
    def generate_itol_ltoi(labels):
        """Method that generates itol and ltoi

        Args:
            labels: List of labels

        Returns:
            pair of dict: itol, ltoi

        """
        itol = Utils.convert_list_to_dict(labels)
        ltoi = Utils.reserve_dict(itol)
        return itol, ltoi
示例#2
0
    def encode_from_index_files(root,
                                label_col,
                                files=None,
                                file_type='json',
                                pattern=".*json",
                                multi_label=False,
                                multi_label_delimiter=None):
        """Creates ltoi and itol from dataset index.

        Args:
            root: root folder where dataset index files are located

            label_col: Column that contains lables

            files: List of dataset files. If this is not provided,
                any file in root folder that matches the pattern
                will be read

            file_type: Type of index file. Default is json

            pattern: regex to match if the files list is not provided.
                If file list is provided, that takes the precedence
                and pattern will be ignored

            multi_label: Indicates whether the labels in multi-label

            multi_lable_delimiter: If the labels are multi-lable, but
                are not represented as an array type, then this parameter
                stores the delimiter.

        Returns:
            Returns a pair of dict: One going from label to int and
                another going from int ot label.

        """
        file_lists = Utils.create_list_of_file_paths(root, files, pattern)
        file_read_method = Encode_Labels.determine_read_file_method(file_type)
        label_extract_method = Encode_Labels.determine_label_extract_method(
            multi_label, multi_label_delimiter)
        dataframes = pd.concat([file_read_method(f) for f in file_lists])
        labels = label_extract_method(dataframes[label_col],
                                      multi_label_delimiter)
        return Encode_Labels.generate_itol_ltoi(labels)
示例#3
0
def default_labels():
    return Utils.read_json("./deep_abyasa/tests/data/sample.json")['label']
示例#4
0
def multi_labels_delimiter():
    return Utils.read_json("./deep_abyasa/tests/data/sample3.json")['label']
示例#5
0
def test_create_list_of_file_paths():
    items = Utils.create_list_of_file_paths("/Users/temp",
                                            files=['world', 'peace'])
    assert ('/Users/temp/world' in items)
    assert ('/Users/temp/peace' in items)
示例#6
0
def test_reserve_dict():
    items = Utils.reserve_dict({0: "carbon", 1: "hydrogen", 2: "oxygen"})
    assert (items['carbon'] == 0)
    assert (items['hydrogen'] == 1)
    assert (items['oxygen'] == 2)
示例#7
0
def test_convert_list_to_dict():
    items = Utils.convert_list_to_dict(['oxygen', 'carbon', 'hydrogen'])
    assert (items[0] == 'carbon')
    assert (items[1] == 'hydrogen')
    assert (items[2] == 'oxygen')
示例#8
0
def test_create_list_of_file_paths_from_pattern():
    items = Utils.create_list_of_file_paths("./", pattern=".*md")
    assert ('./README.md' in items)