Exemplo n.º 1
0
 def _makeTable(self, element):
   """This function takes the data element and figures out what data to read and how to read it."""
   if self.__timer:
     self.__timer.output('Read file')
   fileName = element.get("file")
   fileType = element.get("type")
   try:
     directory=element.get("dir")
   except:
     directory=''
   try:
     header = element.get("header")
   except:
     header = None
   try:
     sep = element.get("sep")
   except:
     sep = None
   try:
     types = element.get("types")
   except:
     types = None
   if fileType == "UniTable":
     if self.__timer:
       self.__timer.output('Read file')
       w = uniTableWrapper(uni.UniTable().from_any_file(fileName))
       self.__timer.output('returning')
       return [w]
     else:
       return [uniTableWrapper(uni.UniTable().from_any_file(fileName))]
   elif fileType == "CSV":
     _args={} #Holder for optional keword arguments
     if header:
       #Add header option
       _args['header'] = header
     if sep:
       #Add insep option
       _args['insep'] = sep
     if types:
       #Add types option
       _args['types'] = types
     if (directory is not None and os.path.exists(directory)):
       filelist=[os.path.join(directory,f) for f in os.listdir(directory)]
       for f in filelist:
         if (os.path.isdir(f)):
           filelist.remove(f)
       return [uniTableWrapper(uni.UniTable().from_csv_file(files,**_args)) for files in filelist]
     else:
       return [uniTableWrapper(uni.UniTable().from_csv_file(fileName,**_args))]
   elif fileType == "XML":
     return xmlFile(fileName)
   elif fileType == "XMLEvents":
     magicheader = False
     for child in element:
       if child.tag == "MagicHeader":
         magicheader = True
         break
     return xmlTable(fileName, magicheader)
   else:
     raise StandardError, "File type not recognized"
Exemplo n.º 2
0
 def _collectTreeStats(self, results, stats):
     """Given a list of scores and the current count tables, update the appropriate count tables."""
     #The last four values of each result are score, alert, segment and extras.
     #Anything before the last four were asked for as the field we are predicting or the fields used to make our prediction.
     for result in results:
         #print result
         rules = Producer.tupelize(result[-2])
         #tbl = stats[rules]
         if stats[rules]:
             tbl = stats[rules][-1]
             #update information
             if (len(tbl) < 20000):
                 tbl.append(result[:-4])
             else:
                 stats[rules].append(
                     uni.UniTable([self.__field] + self.__attributes,
                                  _prealloc=100000))
                 tbl = stats[rules][-1]
                 tbl.append(result[:-4])
         else:
             #initialize information, preallocate 100 rows
             tmp = uni.UniTable([self.__field] + self.__attributes,
                                _prealloc=100000)
             tmp.append(result[:-4])
             stats[rules] = [tmp]
Exemplo n.º 3
0
def makeConfigs(inFile, outFile, inPMML, outPMML):
    #open data file
    inf = uni.UniTable().fromfile(inFile)
    #start the configuration file
    root = ET.Element("model")
    root.set("input", str(inPMML))
    root.set("output", str(outPMML))
    test = ET.SubElement(root, "test")
    test.set("field", "Automaker")
    test.set("weightField", "Count")
    test.set("testStatistic", "dDist")
    test.set("testType", "threshold")
    test.set("windowSize", "200")
    # note that for dDist test, threshold is really a 'ceiling'
    test.set("threshold", "0.15")
    # use a discrete distribution model for test
    baseline = ET.SubElement(test, "baseline")
    baseline.set("dist", "discrete")
    baseline.set("file", str(inFile))
    baseline.set("type", "UniTable")
    #create the segmentation declarations for the two fields
    segmentation = ET.SubElement(test, "segmentation")
    makeSegment(inf, segmentation, "Color")
    #output the configurations
    tree = ET.ElementTree(root)
    tree.write(outFile)
Exemplo n.º 4
0
def makeConfigs(inFile, outFile, inPMML, outPMML):
    #open data file
    inf = uni.UniTable().fromfile(inFile)
    #start the configuration file
    root = ET.Element("model")
    root.set("input", str(inPMML))
    root.set("output", str(outPMML))
    test = ET.SubElement(root, "test")
    test.set("field", "volume")
    test.set("testStatistic", "zValue")
    test.set("testType", "threshold")
    test.set("threshold", "1.5")
    baseline = ET.SubElement(test, "baseline")
    baseline.set("dist", "gaussian")
    baseline.set("file", str(inFile))
    baseline.set("type", "UniTable")
    #create the segmentation declarations for the two fields
    segmentation = ET.SubElement(test, "segmentation")
    makeSegment(inf, segmentation, "fielddeviceid")
    makeSegment(inf, segmentation, "locationtimestamp")
    #output the configurations
    tree = ET.ElementTree(root)
    tree.write(outFile)
Exemplo n.º 5
0
        name = line[start + 1:]
        name = name[:name.find('"')]
        names.append(name)
inf.close()

#Read in the pmml file and set the "get value function"
myReader = pmmlReader()
myReader.parse("sample_events.pmml")
myPMML = myReader.root
myPMML.updateInputFunctions(get)

#open the output file
out = open("sample_events.out", "w")

#read in evente
myTable = uni.UniTable()
myTable.from_any_file("sample_events.nab")
rows = len(myTable)

#will hold fifty (or whatever step is set to) values at a time
inputValues = {}

#Gets the model for convience and speed's sake
model = myPMML.getChildrenOfType(pmmlModels)[0]

#Tell the model which fields we'll want back out for reporting
model.initialize(["Auth_Dt"])

#cache 50 rows at a time
cnt = 0
step = 50
Exemplo n.º 6
0
 def makeTests(self):
     """"""
     if self.__timer:
         self.__timer.output("Making test distributions from statistics")
     #TEMPORARY
     outFields = []
     outValues = []
     outMeans = []
     outStdDevs = []
     #extensions
     extensions = []
     if self.__skip:
         extensions.append(
             pmmlExtension(children=[
                 extensionSkip(attributes={"number": str(self.__skip)})
             ]))
     #create a test for each segment
     tests = []
     keys = self._stats.keys()
     keys.sort()
     if self._alternate:
         #include alternate distributions
         baseDist = self._baseline.get("dist")
         altDist = self._alternate.get("dist")
         for entry in keys:
             if self._stats[entry] and self._altstats[entry]:
                 child = Producer.makeDistribution(baseDist,
                                                   self._stats[entry])
                 if child:
                     baseline = pmmlBaseline(children=[child])
                     temp = Producer.makeDistribution(
                         altDist, self._altstats[entry])
                     if temp:
                         alt = pmmlAlternate(children=[temp])
                         segments = Producer.makeSegments(entry)
                         segments = pmmlSegments(children=segments)
                         children = list(extensions)
                         children.extend([baseline, alt, segments])
                         tests.append(
                             pmmlTestDistributions(children=children,
                                                   attributes=self._attrs))
     else:
         #do not include alternate distributions
         baseDist = self._baseline.get("dist")
         for entry in keys:
             if self._stats[entry]:
                 child = Producer.makeDistribution(baseDist,
                                                   self._stats[entry],
                                                   self.testValidation)
                 if child:
                     baseline = pmmlBaseline(children=[child])
                     segments = Producer.makeSegments(entry)
                     segments = pmmlSegments(children=segments)
                     children = list(extensions)
                     children.extend([baseline, segments])
                     tests.append(
                         pmmlTestDistributions(children=children,
                                               attributes=self._attrs))
                     #TEMPORARY
                     if self.__debugFile:
                         if entry:
                             outFields.append(entry[0][0])
                             outValues.append(entry[0][1])
                         stats = self._stats[entry]
                         outMeans.append(float(stats[2]) / stats[4])
                         outStdDevs.append(
                             math.sqrt(
                                 max((float(stats[3]) / stats[4] -
                                      (float(stats[2]) / stats[4])**2), 0)))
     #put the tests in the current model
     originals = self._model.getChildrenOfType(pmmlTestDistributions)
     if (self.__mode == 'Update'):
         self._model.removeChildren(originals)
         self._model.addChildren(tests)
     else:
         self._model.addChildren(tests)
     #TEMPORARY
     if self.__debugFile:
         out = uni.UniTable(["field", "value", "mean", "stddev"])
         out["field"] = outFields
         out["value"] = outValues
         out["mean"] = outMeans
         out["stddev"] = outStdDevs
         out.to_nab_file(str(self.__debugFile))