def main(datafile, threshold):
    filename = "out{}{}.hrc".format(os.sep, os.path.basename(datafile.name))

    if not os.path.isfile(filename):
        header = datafile.readline()
        collist = [i for i, toggle in enumerate(header.split(",")) if toggle != "0"]
        datafile.seek(0)
        data = pd.read_csv(datafile, usecols=collist).as_matrix()

        pipeline = Pipeline([("clf", Hierarchical())])
        pipeline.set_params(**{})
        pipeline.fit(data)

        clf = pipeline.get_params()["clf"]
        hierarchy = clf.hierarchy_

        with open(filename, "wb") as fh:
            fh.write(ET.tostring(hierarchy.to_xml()))
    else:
        with open(filename, "rb") as fh:
            hierarchy = Cluster.from_xml(ET.parse(fh).getroot())

    print(ET.tostring(hierarchy.to_xml()).decode("utf-8"))

    if threshold != None:
        clusters = hierarchy.cut(threshold)
        print("\n".join(c.to_str(i) for i, c in enumerate(clusters)))
        dump_graph(clusters)
 def test_from_xml_small2(self):
     expected = Cluster([
         Cluster([np.array([3,0,4])]),
         Cluster([np.array([0,0,0])])
     ])
     actual = Cluster.from_xml(
         ET.fromstring('<tree height="5.0"><leaf data="[3, 0, 4]"/><leaf data="[0, 0, 0]"/></tree>')
     )
     self.assertEqual(actual, expected)
 def test_from_xml(self):
     actual = Cluster.from_xml(self.xml)
     expected = self.tree
     self.assertEqual(actual, expected)
 def test_from_xml_small(self):
     expected = Cluster([np.array([7,4,8])])
     actual = Cluster.from_xml(
         ET.fromstring('<tree height="0.0"><leaf data="[7, 4, 8]"/></tree>')
     )
     self.assertEqual(actual, expected)