예제 #1
0
    def __init__(self, source=None, language=pythonSource, resultsDir=None, corpus=mitlmCorpus):
        self.resultsDir = ((resultsDir or os.getenv("ucResultsDir", None)) or mkdtemp(prefix='ucValidation-'))
        if isinstance(source, str):
            raise NotImplementedError
        elif isinstance(source, list):
            self.validFileNames = source
        else:
            raise TypeError("Constructor arguments!")

        assert os.access(self.resultsDir, os.X_OK & os.R_OK & os.W_OK)
        self.csvPath = path.join(self.resultsDir, 'results.csv')
        self.progress = dict()
        try:
          self.csvFile = open(self.csvPath, 'r')
          self.csv = csv.reader(self.csvFile)
          for row in self.csv:
            if row[0] in self.progress:
              self.progress[row[0]] += 1 
            else:
              self.progress[row[0]] = 1
          self.csvFile.close()
        except (IOError):
          pass
        self.csvFile = open(self.csvPath, 'a')
        self.csv = csv.writer(self.csvFile)
        self.corpusPath = os.path.join(self.resultsDir, 'validationCorpus')
        self.cm = corpus(readCorpus=self.corpusPath, writeCorpus=self.corpusPath, order=10)
        self.lm = language
        self.sm = sourceModel(cm=self.cm, language=self.lm)
        self.validFiles = list()
        self.addValidationFile(self.validFileNames)
        self.genCorpus()
예제 #2
0
 def basicSetup(self, ngram_order=10):
     self.uc = unnaturalCode(logFilePath=self.logFilePath)
     # Oiugh... thank you, dependecy injection.
     self.cm = mitlmCorpus(readCorpus=self.readCorpus,
                           writeCorpus=self.readCorpus,
                           uc=self.uc,
                           order=ngram_order)
     self.sm = sourceModel(cm=self.cm, language=self.lm)
예제 #3
0
 def basicSetup(self, ngram_order=10):
     self.uc = unnaturalCode(logFilePath=self.logFilePath)
     # Oiugh... thank you, dependecy injection.
     self.cm = mitlmCorpus(readCorpus=self.readCorpus,
                           writeCorpus=self.readCorpus,
                           uc=self.uc,
                           order=ngram_order)
     self.sm = sourceModel(cm=self.cm, language=self.lm)
 def setUpClass(self):
     self.td = mkdtemp(prefix='ucTest-')
     assert os.access(self.td, os.X_OK & os.R_OK & os.W_OK)
     assert os.path.isdir(self.td)
     readCorpus = os.path.join(self.td, 'ucCorpus')
     logFilePath = os.path.join(self.td, 'ucLogFile')
     self.uc = unnaturalCode(logFilePath=logFilePath)
     self.cm = mitlmCorpus(readCorpus=readCorpus, writeCorpus=readCorpus, uc=ucGlobal)
     self.sm = sourceModel(cm=self.cm, language=pythonSource)
예제 #5
0
 def __init__(self, ngram_order=10):
     self.homeDir = os.path.expanduser("~")
     self.ucDir = os.getenv("UC_DATA", os.path.join(self.homeDir, ".unnaturalCode"))
     if not os.path.exists(self.ucDir):
       os.makedirs(self.ucDir)
     assert os.access(self.ucDir, os.X_OK & os.R_OK & os.W_OK)
     assert os.path.isdir(self.ucDir)
     
     self.readCorpus = os.path.join(self.ucDir, 'pyCorpus') 
     self.logFilePath = os.path.join(self.ucDir, 'pyLogFile')
     
     self.uc = unnaturalCode(logFilePath=self.logFilePath)
     # Oiugh... thank you, dependecy injection.
     self.cm = mitlmCorpus(readCorpus=self.readCorpus,
                           writeCorpus=self.readCorpus,
                           uc=self.uc,
                           order=ngram_order)
     self.lm = pythonSource
     self.sm = sourceModel(cm=self.cm, language=self.lm)
예제 #6
0
 def __init__(self,
              test=None,
              train=None,
              language=pythonSource,
              resultsDir=None,
              corpus=mitlmCorpus,
              keep=False,
              retry_valid=False):
     self.resultsDir = ((resultsDir or os.getenv("ucResultsDir", None))
                        or mkdtemp(prefix='ucValidation-'))
     self.retry_valid = retry_valid
     if isinstance(test, str):
         raise NotImplementedError
     elif isinstance(test, list):
         self.testFileNames = test
     else:
         raise TypeError("Constructor arguments!")
     if isinstance(train, str):
         raise NotImplementedError
     elif isinstance(train, list):
         self.trainFileNames = train
     else:
         raise TypeError("Constructor arguments!")
     assert os.access(self.resultsDir, os.X_OK & os.R_OK & os.W_OK)
     self.csvPath = path.join(self.resultsDir, 'results.csv')
     self.progress = dict()
     try:
         self.csvFile = open(self.csvPath, 'r')
         self.csv = csv.reader(self.csvFile)
         for row in self.csv:
             if (row[0], row[1]) in self.progress:
                 self.progress[(row[0], row[1])] += 1
             else:
                 self.progress[(row[0], row[1])] = 1
         self.csvFile.close()
     except (IOError):
         pass
     self.csvFile = open(self.csvPath, 'a')
     self.csv = csv.writer(self.csvFile)
     self.corpusPath = os.path.join(self.resultsDir, 'validationCorpus')
     if keep:
         pass
     elif os.path.exists(self.corpusPath):
         os.remove(self.corpusPath)
     if keep:
         pass
     elif os.path.exists(self.corpusPath + ".uniqueTokens"):
         os.remove(self.corpusPath + ".uniqueTokens")
     self.cm = corpus(readCorpus=self.corpusPath,
                      writeCorpus=self.corpusPath,
                      order=10)
     self.lm = language
     self.sm = sourceModel(cm=self.cm, language=self.lm)
     self.trainFiles = list()
     self.testFiles = list()
     self.addValidationFile(self.trainFileNames,
                            testing=False,
                            training=True)
     self.genCorpus()
     del self.trainFiles
     self.addValidationFile(self.testFileNames,
                            testing=True,
                            training=False)
 def testCorpify(self):
     sm = sourceModel(cm=mitlmCorpus())
     self.assertEquals(sm.corpify(pythonSource(someLexemes)), 'print ( 1 + 2 ** 2 ) <ENDMARKER>')