from ml.datasets.hospital import HospitalHoloClean
from ml.plot.old.runtime_all_potential import PlotterLatex

data = HospitalHoloClean()


real_time = [3.1287178993225098, 6.199800968170166, 8.152935981750488, 10.118408918380737, 13.08483600616455, 14.816941022872925, 16.66439700126648, 18.335848808288574, 19.961424827575684, 21.69600796699524, 23.43687391281128, 25.171907901763916, 27.98496699333191, 30.31994080543518, 32.29134678840637, 34.1041738986969, 35.82223892211914, 38.037752866744995, 40.37613892555237, 42.55391979217529, 44.68676781654358, 46.98596501350403, 49.47311091423035, 53.612833976745605, 57.06164193153381, 59.49203181266785, 61.78589391708374, 63.923738956451416, 66.18580794334412, 68.69243383407593, 71.35987281799316, 73.50287199020386, 75.8047947883606, 78.90920090675354, 81.6969039440155, 83.99877500534058, 85.99708080291748, 87.61870884895325, 89.19986200332642, 90.65743780136108, 92.40753388404846, 93.92150402069092, 95.58677387237549, 97.21557378768921, 100.01405000686646, 102.60115480422974, 105.10586285591125, 107.16777801513672, 108.79560780525208, 111.01464700698853, 113.46868395805359, 115.0854709148407, 116.78143000602722, 118.30635190010071, 119.81359481811523, 121.40937685966492, 123.20198488235474, 125.0586748123169, 128.16096186637878, 130.59526586532593, 133.0130078792572, 134.73866391181946, 136.56256580352783, 139.30404901504517, 141.72917580604553, 143.4360749721527, 145.23482298851013, 147.4541358947754, 150.02832698822021, 151.94772696495056, 153.62116599082947, 155.4402289390564, 157.9165699481964, 160.11786198616028, 162.05129480361938, 164.20514297485352, 166.95434880256653, 168.7499599456787, 170.51549291610718, 172.7246379852295, 175.23837184906006, 177.242577791214, 179.20157384872437, 182.1430938243866, 184.3015899658203, 186.20164585113525, 188.26354479789734, 190.8990888595581, 192.90097880363464, 194.66154098510742, 196.44854998588562, 198.01424193382263, 199.79720902442932, 201.60879778862, 203.48117089271545, 206.5908968448639, 209.31764388084412, 212.16771292686462, 214.0033187866211, 216.24289178848267, 220.1515347957611, 222.09833788871765]


fscore_metadata_no_svd_absolute_potential = []
fscore_metadata_no_svd_absolute_potential.append([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.090909090909090912, 0.13945578231292519, 0.15015015015015015, 0.2374821173104435, 0.16159860990443092, 0.19914893617021276, 0.25700164744645798, 0.30312750601443467, 0.19972887483054677, 0.22142857142857142, 0.24261138067931184, 0.2670726402783819, 0.2670726402783819, 0.26933101650738489, 0.26933101650738489, 0.25891783567134269, 0.25891783567134269, 0.26422764227642276, 0.26422764227642276, 0.27066450567260941, 0.27066450567260941, 0.27076677316293929, 0.28548895899053633, 0.45193508114856429, 0.51851851851851849, 0.77014925373134324, 0.77014925373134324, 0.78106508875739644, 0.78106508875739644, 0.85862785862785862, 0.85862785862785862, 0.86570247933884303, 0.86687306501547989, 0.89434364994663818, 0.89574468085106396, 0.9015873015873016, 0.9276859504132231, 0.96303696303696307, 0.96303696303696307, 0.96303696303696307, 0.97227722772277225, 0.97227722772277225, 0.98709036742800393, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
fscore_metadata_no_svd_absolute_potential.append([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.090909090909090912, 0.16470588235294117, 0.1753246753246753, 0.26810477657935283, 0.23980222496909762, 0.29868578255675032, 0.37442922374429227, 0.43516483516483517, 0.24377318494965552, 0.26582940868655158, 0.27946295375435104, 0.30641213901125797, 0.30641213901125797, 0.31137140068326014, 0.31137140068326014, 0.31543299467827768, 0.31543299467827768, 0.32283464566929132, 0.32365961633054596, 0.32612966601178789, 0.33104799216454456, 0.33674963396778917, 0.37447698744769875, 0.37447698744769875, 0.37447698744769875, 0.38568588469184889, 0.40476190476190471, 0.40476190476190471, 0.41996911991765312, 0.43367346938775508, 0.43932411674347166, 0.45514445007602639, 0.45514445007602639, 0.45514445007602639, 0.45514445007602639, 0.45670886075949363, 0.45670886075949363, 0.47904191616766473, 0.47904191616766473, 0.47904191616766473, 0.48131539611360247, 0.48584202682563332, 0.5007378258730939, 0.5007378258730939, 0.51859398879266427, 0.51859398879266427, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
fscore_metadata_no_svd_absolute_potential.append([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.090909090909090912, 0.15540540540540543, 0.16190476190476188, 0.25339366515837108, 0.23884514435695542, 0.30555555555555552, 0.38507821901323708, 0.44855491329479774, 0.24726477024070026, 0.26997840172786175, 0.29483767961681745, 0.22630198576245783, 0.22630198576245783, 0.23217618514371036, 0.23217618514371036, 0.23538119911176908, 0.23538119911176908, 0.23997000374953129, 0.23997000374953129, 0.24194756554307115, 0.24739195230998512, 0.25185185185185188, 0.25185185185185188, 0.26976069615663523, 0.27226647356987688, 0.28880866425992779, 0.28880866425992779, 0.30039525691699603, 0.30039525691699603, 0.30039525691699603, 0.31683873264506945, 0.31683873264506945, 0.31683873264506945, 0.2983316977428852, 0.31345826235093699, 0.31575365770670299, 0.32682425488180877, 0.33424283765347884, 0.33424283765347884, 0.45722171113155474, 0.79967819790828631, 0.79967819790828631, 0.79967819790828631, 0.79967819790828631, 0.79967819790828631, 0.79967819790828631, 0.79967819790828631, 0.79967819790828631, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
fscore_metadata_no_svd_absolute_potential.append([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.090909090909090912, 0.16470588235294117, 0.16510903426791276, 0.25481481481481483, 0.22196531791907512, 0.20980392156862746, 0.27573182247403211, 0.32014719411223552, 0.19941916747337851, 0.22179732313575526, 0.24445493157149598, 0.27044609665427505, 0.27044609665427505, 0.27365491651205937, 0.27365491651205937, 0.27885921231326394, 0.27885921231326394, 0.28518859245630174, 0.28518859245630174, 0.32113225963884823, 0.32113225963884823, 0.34296724470134876, 0.34296724470134876, 0.38912133891213391, 0.40991223541559113, 0.42132239876986161, 0.42616249361267239, 0.42937276899541055, 0.43654822335025378, 0.43654822335025378, 0.43654822335025378, 0.45995893223819301, 0.46019517205957883, 0.47219307450157394, 0.47299423177766126, 0.49275362318840582, 0.49275362318840582, 0.49275362318840582, 0.49275362318840582, 0.49275362318840582, 0.49275362318840582, 0.49275362318840582, 0.51521298174442187, 0.51596553471870255, 0.78853601859024014, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 0.9922027290448342, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
fscore_metadata_no_svd_absolute_potential.append([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.090909090909090912, 0.12006319115323853, 0.14141414141414144, 0.22589531680440775, 0.23355704697986576, 0.28422425032594523, 0.36724565756823818, 0.41826923076923073, 0.2310712282669658, 0.25649530127142067, 0.28213507625272327, 0.21629855293221631, 0.21629855293221631, 0.21833396728794222, 0.21833396728794222, 0.22371866816311259, 0.22371866816311259, 0.22820318423047767, 0.23356009070294786, 0.23356009070294786, 0.25531914893617019, 0.2585291887793783, 0.27428571428571424, 0.28748068006182381, 0.29275808936825887, 0.29275808936825887, 0.2934154793993069, 0.29135270900609972, 0.29135270900609972, 0.29135270900609972, 0.29135270900609972, 0.30510896748838873, 0.3171337353671515, 0.3171337353671515, 0.43864734299516911, 0.43864734299516911, 0.72931726907630523, 0.73120000000000007, 0.73120000000000007, 0.74444444444444446, 0.80445969125214412, 0.81260647359454852, 0.84335309060118546, 0.84335309060118546, 0.84335309060118546, 0.85016835016835013, 0.85016835016835013, 0.85016835016835013, 0.85016835016835013, 0.85016835016835013, 0.85016835016835013, 0.99123661148977604, 0.99123661148977604, 0.99123661148977604, 0.99123661148977604, 0.99123661148977604, 0.99123661148977604, 0.99123661148977604, 0.99123661148977604, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])



dboost_fscore = 0.5763
runtime_dboost_sec = 24

end_time_in_minutes = 48

nadeef_fscore = [0.0, 0.040295500335795834, 0.052003410059676042, 0.061026352288488211, 0.054528930627082708, 0.052949640287769786, 0.053433521185556256, 0.055248618784530384, 0.055248618784530384, 0.05564746578432847, 0.05564746578432847]

nadeef_time = [0, 3.061776876449585, 5.8756139278411865, 8.678483009338379, 11.494767904281616, 14.31603479385376, 17.18073296546936, 20.15193200111389, 23.039172887802124, 26.00995182991028, end_time_in_minutes * 60]


openrefine_fscore = 1.0
openrefine_time = 3
    dirty = dataSet.dirty_pd

    for i in range(dataSet.shape[1]):
        directories = ["/home/felix/SequentialPatternErrorDetection/data/" + dataSet.name + "/column_" + str(i) + "/orig_input",
                       "/home/felix/SequentialPatternErrorDetection/data/" + dataSet.name + "/column_" + str(i) + "/cv",
                       "/home/felix/SequentialPatternErrorDetection/data/" + dataSet.name + "/column_" + str(i) + "/features",
                       "/home/felix/SequentialPatternErrorDetection/data/" + dataSet.name + "/column_" + str(i) + "/input"
                       ]

        for dir in directories:
            if not os.path.exists(dir):
                os.makedirs(dir)


        dirty[dirty.columns[i]].to_csv("/home/felix/SequentialPatternErrorDetection/data/" + dataSet.name + "/column_" + str(i) + "/orig_input/column_" + str(i) + ".txt", index=False, header=None)


'''
from ml.flights.FlightHoloClean import FlightHoloClean
dataSet = FlightHoloClean()

dirty = dataSet.dirty_pd

for i in range(dataSet.shape[1]):
    dirty[dirty.columns[i]].to_csv("/home/felix/SequentialPatternErrorDetection/data/Flights/column_" + str(i) + ".txt", index=False, header=None)
'''

from ml.datasets.hospital import HospitalHoloClean

dataSet = HospitalHoloClean()
setup_deep_learning_env(dataSet)