Exemplo n.º 1
0
def main(args):

  if not args.absolutePaths:
    args.labelDir = os.path.join(root, args.labelDir)
    args.dataDir = os.path.join(root, args.dataDir)
    args.destDir = os.path.join(root, args.destDir)

  if not checkInputs(args):
    return

  corpus = Corpus(args.dataDir)

  corpusLabel = CorpusLabel(args.labelDir, corpus=corpus)
  corpusLabel.getEverything()

  columnData = {}
  for relativePath in corpusLabel.labels.keys():
    columnData[relativePath] = pandas.Series(
      corpusLabel.labels[relativePath]["label"])

  corpus.addColumn("label", columnData)

  corpus.copy(newRoot=args.destDir)

  print "Done adding labels!"
Exemplo n.º 2
0
def main(args):

    if not args.absolutePaths:
        args.labelDir = os.path.join(root, args.labelDir)
        args.dataDir = os.path.join(root, args.dataDir)
        args.destDir = os.path.join(root, args.destDir)

    if not checkInputs(args):
        return

    corpus = Corpus(args.dataDir)

    corpusLabel = CorpusLabel(args.labelDir, corpus=corpus)
    corpusLabel.getEverything()

    columnData = {}
    for relativePath in corpusLabel.labels.keys():
        columnData[relativePath] = pandas.Series(
            corpusLabel.labels[relativePath]["label"])

    corpus.addColumn("label", columnData)

    corpus.copy(newRoot=args.destDir)

    print "Done adding labels!"
Exemplo n.º 3
0
def main(args):

    if not args.absolutePaths:
        args.dataDir = os.path.join(root, args.dataDir)
        args.destDir = os.path.join(root, args.destDir)

    if not checkInputs(args):
        return

    if not os.path.exists(args.destDir):
        os.makedirs(args.destDir)

    datafiles = [f for f in os.listdir(args.dataDir) if f.endswith(".csv")]

    for datafile in datafiles:
        input_filename = os.path.join(args.dataDir, datafile)
        output_filename = os.path.join(args.destDir, datafile)
        sortData(input_filename, output_filename)

    print "Sorted files written to ", args.destDir
Exemplo n.º 4
0
def main(args):

  if not args.absolutePaths:
    args.dataDir = os.path.join(root, args.dataDir)
    args.destDir = os.path.join(root, args.destDir)

  if not checkInputs(args):
    return

  if not os.path.exists(args.destDir):
    os.makedirs(args.destDir)

  datafiles = [f for f in os.listdir(args.dataDir) if f.endswith(".csv")]

  for datafile in datafiles:
    input_filename = os.path.join(args.dataDir, datafile)
    output_filename = os.path.join(args.destDir, datafile)
    sortData(input_filename, output_filename)

  print "Sorted files written to ", args.destDir
Exemplo n.º 5
0
                      default="labels/combined_windows.json",
                      help="Where the combined windows file will be stored")

  parser.add_argument("--absolutePaths",
                      default=False,
                      action="store_true",
                      help="If specified, paths are absolute paths")

  parser.add_argument("--threshold",
                      default=0.5,
                      type=float,
                      help="The percentage agreement you would like between all\
                      labelers for a record to be considered anomalous (should \
                      be a number between 0 and 1)")
                      
  parser.add_argument("--verbosity",
                      default=1,
                      type=int,
                      help="Set the level of verbosity; to print out labeling \
                      metrics during the process, acceptable values are 0,1,2")

  parser.add_argument("--skipConfirmation",
                    default=False,
                    action="store_true",
                    help="If specified will skip the user confirmation step")

  args = parser.parse_args()

  if args.skipConfirmation or checkInputs(args):
    main(args)
Exemplo n.º 6
0
def initialize_args_and_run():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--detect",
        help="Generate detector results but do not analyze results "
        "files.",
        default=False,
        action="store_true")

    parser.add_argument(
        "--optimize",
        help="Optimize the thresholds for each detector and user "
        "profile combination",
        default=False,
        action="store_true")

    parser.add_argument("--score",
                        help="Analyze results in the results directory",
                        default=False,
                        action="store_true")

    parser.add_argument("--normalize",
                        help="Normalize the final scores",
                        default=False,
                        action="store_true")

    parser.add_argument(
        "--skipConfirmation",
        help="If specified will skip the user confirmation step",
        default=False,
        action="store_true")

    parser.add_argument("--data",
                        help="Y if Yahoo and N if NAB (default)",
                        default="N")

    parser.add_argument(
        "-d",
        "--detectors",
        nargs="*",
        type=str,
        default=[
            "null", "numenta", "random", "skyline", "bayesChangePt",
            "windowedGaussian", "expose", "relativeEntropy"
        ],
        help="Comma separated list of detector(s) to use, e.g. "
        "null,numenta")

    parser.add_argument("-n",
                        "--numCPUs",
                        default=None,
                        help="The number of CPUs to use to run the "
                        "benchmark. If not specified all CPUs will be used.")

    args = parser.parse_args()

    if (not args.detect and not args.optimize and not args.score
            and not args.normalize):
        args.detect = True
        args.optimize = True
        args.score = True
        args.normalize = True

    if len(args.detectors) == 1:
        # Handle comma-seperated list argument.
        args.detectors = args.detectors[0].split(",")

        # The following imports are necessary for getDetectorClassConstructors to
        # automatically figure out the detector classes.
        # Only import detectors if used so as to avoid unnecessary dependency.
        # if "bayesChangePt" in args.detectors:
        #   from nab.detectors.bayes_changept.bayes_changept_detector import (
        #     BayesChangePtDetector)
        # if "numenta" in args.detectors:
        #   from nab.detectors.numenta.numenta_detector import NumentaDetector
        # if "numentaTM" in args.detectors:
        #   from nab.detectors.numenta.numentaTM_detector import NumentaTMDetector
        # if "null" in args.detectors:
        #   from nab.detectors.null.null_detector import NullDetector
        # if "random" in args.detectors:
        #   import nab.detectors.random.random_detector
        #   global RandomDetector
        # if "skyline" in args.detectors:
        #   from nab.detectors.skyline.skyline_detector import SkylineDetector
        # if "windowedGaussian" in args.detectors:
        #   from nab.detectors.gaussian.windowedGaussian_detector import (
        #     WindowedGaussianDetector)
        # if "relativeEntropy" in args.detectors:
        #   from nab.detectors.relative_entropy.relative_entropy_detector import (
        #     RelativeEntropyDetector)

        # To run expose detector, you must have sklearn version 0.16.1 installed.
        # Higher versions of sklearn may not be compatible with numpy version 1.9.2
        # required to run nupic.
        # if "expose" in args.detectors:
        # from nab.detectors.expose.expose_detector import ExposeDetector

        # if "contextOSE" in args.detectors:
        # from nab.detectors.context_ose.context_ose_detector import (
        # ContextOSEDetector )

    ### Dataset selection
    root = '/'.join(os.path.realpath(__file__).split('/')[:-3])
    if args.data == 'Y':
        args.dataDir = os.path.join(root, 'data/data_yahoo')
        args.windowsFile = os.path.join(root, 'data/labels/yahoo_windows.json')
        args.resultsDir = os.path.join(root, 'experiments/result_yahoo')
        args.thresholdsFile = os.path.join(
            root, 'experiments/config/thresholds_yahoo.json')
    else:
        args.dataDir = os.path.join(root, 'data/data_nab')
        args.windowsFile = os.path.join(root,
                                        'data/labels/combined_windows.json')
        args.resultsDir = os.path.join(root, 'experiments/result_nab')
        args.thresholdsFile = os.path.join(
            root, 'experiments/config/thresholds.json')

    args.profilesFile = os.path.join(root, 'experiments/config/profiles.json')

    if args.skipConfirmation or checkInputs(args):
        with open("timing.csv", "w") as myfile:
            myfile.write(args.detectors[0] + ', ' + args.dataDir + '\n')
        main(args)
Exemplo n.º 7
0
  corpusLabel.initialize()

  print "Success!"


if __name__ == "__main__":
  parser = argparse.ArgumentParser()

  parser.add_argument("--labelDir",
                    help="This directory holds all the individual labels")

  parser.add_argument("--dataDir",
                    default="data",
                    help="This holds all the label windows for the corpus")

  parser.add_argument("--destDir",
                    help="Where you want to store the combined labels",
                    default="labels")

  parser.add_argument("--absolutePaths",
                      help="Whether file paths entered are not relative to \
                      NAB root",
                      default=False,
                      action="store_true")

  args = parser.parse_args()

  if checkInputs(args):
    main(args)