def spam_maker(args):
    if args.json:
        spam = shuffle_from_file(args.json)
    elif args.text:
        spam = utils.read_from_text(args.text)
    else:
        spam = [
            " ".join(get_random_word() for _ in range(400))
            for _ in range(2000)
        ]
    return spam
示例#2
0
def data_seletor(dataset_name):
  if dataset_name == 'hand_write_digits':
    '''
    Classes                         10
    Samples per class             ~180
    Samples total                 1797
    Dimensionality                  64
    Features             integers 0-16
    '''
    seleted = datasets.load_digits(n_class=5)
    points = seleted.data
    label = seleted.target

  elif dataset_name == '5d5c':
    '''
    Classes                          5
    Samples per class                ?
    Samples total                    ?
    Dimensionality                  25
    '''
    points, label = utl.read_from_text('5d5c_std')

  elif dataset_name == "letter-recognition":
    points, label = utl.read_from_text('letter-recognition')

  elif dataset_name == "lung-cancer":
    points, label = utl.read_from_text('lung-cancer')

  elif dataset_name == "image_seg":
    points, label = utl.read_from_text('imgseg')

  elif dataset_name == '20d6c':
    points, label = utl.read_from_text('20d6c_std')

  elif dataset_name == '50d6c':
    points, label = utl.read_from_text('50d6c_std')

  else:
    assert("not found")

  return points, label
示例#3
0
    res_cls.append(cur_cls)

  for cls in tmp_clusters:
    res_cls.append(cls)

  print("#cls {} -> {}".format(len(clusters), len(res_cls)))
  print(calc_num_point(res_cls))

  return res_cls


if __name__ == '__main__':
  doctest.testmod()

  points, label = utl.read_from_text('2d5c_noncycle')

  points = utl.centralize_data(points)
  points = utl.normalize_data(points)

  # points, label = utl.read_from_text('2d5c_cov')
  # points, label = utl.read_from_text('hand_write_digit_2d')
  # seleted = datasets.load_digits()                                                                                                   
  # points = seleted.data                                                       
  # label = seleted.target
  #
  ms_tree = ms2c(points)
  # paint_tree(ms_tree, ms_tree)

  final_nodes = ms_tree.merge()
  grounded_nodes = ms_tree.grounded_nodes