def main(args): if not args.absolutePaths: args.labelDir = os.path.join(root, args.labelDir) args.dataDir = os.path.join(root, args.dataDir) args.destDir = os.path.join(root, args.destDir) if not checkInputs(args): return corpus = Corpus(args.dataDir) corpusLabel = CorpusLabel(args.labelDir, corpus=corpus) corpusLabel.getEverything() columnData = {} for relativePath in corpusLabel.labels.keys(): columnData[relativePath] = pandas.Series( corpusLabel.labels[relativePath]["label"]) corpus.addColumn("label", columnData) corpus.copy(newRoot=args.destDir) print "Done adding labels!"
def main(args): if not args.absolutePaths: args.dataDir = os.path.join(root, args.dataDir) args.destDir = os.path.join(root, args.destDir) if not checkInputs(args): return if not os.path.exists(args.destDir): os.makedirs(args.destDir) datafiles = [f for f in os.listdir(args.dataDir) if f.endswith(".csv")] for datafile in datafiles: input_filename = os.path.join(args.dataDir, datafile) output_filename = os.path.join(args.destDir, datafile) sortData(input_filename, output_filename) print "Sorted files written to ", args.destDir
default="labels/combined_windows.json", help="Where the combined windows file will be stored") parser.add_argument("--absolutePaths", default=False, action="store_true", help="If specified, paths are absolute paths") parser.add_argument("--threshold", default=0.5, type=float, help="The percentage agreement you would like between all\ labelers for a record to be considered anomalous (should \ be a number between 0 and 1)") parser.add_argument("--verbosity", default=1, type=int, help="Set the level of verbosity; to print out labeling \ metrics during the process, acceptable values are 0,1,2") parser.add_argument("--skipConfirmation", default=False, action="store_true", help="If specified will skip the user confirmation step") args = parser.parse_args() if args.skipConfirmation or checkInputs(args): main(args)
def initialize_args_and_run(): parser = argparse.ArgumentParser() parser.add_argument( "--detect", help="Generate detector results but do not analyze results " "files.", default=False, action="store_true") parser.add_argument( "--optimize", help="Optimize the thresholds for each detector and user " "profile combination", default=False, action="store_true") parser.add_argument("--score", help="Analyze results in the results directory", default=False, action="store_true") parser.add_argument("--normalize", help="Normalize the final scores", default=False, action="store_true") parser.add_argument( "--skipConfirmation", help="If specified will skip the user confirmation step", default=False, action="store_true") parser.add_argument("--data", help="Y if Yahoo and N if NAB (default)", default="N") parser.add_argument( "-d", "--detectors", nargs="*", type=str, default=[ "null", "numenta", "random", "skyline", "bayesChangePt", "windowedGaussian", "expose", "relativeEntropy" ], help="Comma separated list of detector(s) to use, e.g. " "null,numenta") parser.add_argument("-n", "--numCPUs", default=None, help="The number of CPUs to use to run the " "benchmark. If not specified all CPUs will be used.") args = parser.parse_args() if (not args.detect and not args.optimize and not args.score and not args.normalize): args.detect = True args.optimize = True args.score = True args.normalize = True if len(args.detectors) == 1: # Handle comma-seperated list argument. args.detectors = args.detectors[0].split(",") # The following imports are necessary for getDetectorClassConstructors to # automatically figure out the detector classes. # Only import detectors if used so as to avoid unnecessary dependency. # if "bayesChangePt" in args.detectors: # from nab.detectors.bayes_changept.bayes_changept_detector import ( # BayesChangePtDetector) # if "numenta" in args.detectors: # from nab.detectors.numenta.numenta_detector import NumentaDetector # if "numentaTM" in args.detectors: # from nab.detectors.numenta.numentaTM_detector import NumentaTMDetector # if "null" in args.detectors: # from nab.detectors.null.null_detector import NullDetector # if "random" in args.detectors: # import nab.detectors.random.random_detector # global RandomDetector # if "skyline" in args.detectors: # from nab.detectors.skyline.skyline_detector import SkylineDetector # if "windowedGaussian" in args.detectors: # from nab.detectors.gaussian.windowedGaussian_detector import ( # WindowedGaussianDetector) # if "relativeEntropy" in args.detectors: # from nab.detectors.relative_entropy.relative_entropy_detector import ( # RelativeEntropyDetector) # To run expose detector, you must have sklearn version 0.16.1 installed. # Higher versions of sklearn may not be compatible with numpy version 1.9.2 # required to run nupic. # if "expose" in args.detectors: # from nab.detectors.expose.expose_detector import ExposeDetector # if "contextOSE" in args.detectors: # from nab.detectors.context_ose.context_ose_detector import ( # ContextOSEDetector ) ### Dataset selection root = '/'.join(os.path.realpath(__file__).split('/')[:-3]) if args.data == 'Y': args.dataDir = os.path.join(root, 'data/data_yahoo') args.windowsFile = os.path.join(root, 'data/labels/yahoo_windows.json') args.resultsDir = os.path.join(root, 'experiments/result_yahoo') args.thresholdsFile = os.path.join( root, 'experiments/config/thresholds_yahoo.json') else: args.dataDir = os.path.join(root, 'data/data_nab') args.windowsFile = os.path.join(root, 'data/labels/combined_windows.json') args.resultsDir = os.path.join(root, 'experiments/result_nab') args.thresholdsFile = os.path.join( root, 'experiments/config/thresholds.json') args.profilesFile = os.path.join(root, 'experiments/config/profiles.json') if args.skipConfirmation or checkInputs(args): with open("timing.csv", "w") as myfile: myfile.write(args.detectors[0] + ', ' + args.dataDir + '\n') main(args)
corpusLabel.initialize() print "Success!" if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--labelDir", help="This directory holds all the individual labels") parser.add_argument("--dataDir", default="data", help="This holds all the label windows for the corpus") parser.add_argument("--destDir", help="Where you want to store the combined labels", default="labels") parser.add_argument("--absolutePaths", help="Whether file paths entered are not relative to \ NAB root", default=False, action="store_true") args = parser.parse_args() if checkInputs(args): main(args)