def __init__(self, source=None, language=pythonSource, resultsDir=None, corpus=mitlmCorpus): self.resultsDir = ((resultsDir or os.getenv("ucResultsDir", None)) or mkdtemp(prefix='ucValidation-')) if isinstance(source, str): raise NotImplementedError elif isinstance(source, list): self.validFileNames = source else: raise TypeError("Constructor arguments!") assert os.access(self.resultsDir, os.X_OK & os.R_OK & os.W_OK) self.csvPath = path.join(self.resultsDir, 'results.csv') self.csvFile = open(self.csvPath, 'a') self.csv = csv.writer(self.csvFile) self.corpusPath = os.path.join(self.resultsDir, 'validationCorpus') self.cm = corpus(readCorpus=self.corpusPath, writeCorpus=self.corpusPath, order=10) self.lm = language self.sm = sourceModel(cm=self.cm, language=self.lm) self.validFiles = list() self.addValidationFile(self.validFileNames) self.genCorpus()
def setUpClass(self): self.td = mkdtemp(prefix='ucTest-') assert os.access(self.td, os.X_OK & os.R_OK & os.W_OK) assert os.path.isdir(self.td) readCorpus = os.path.join(self.td, 'ucCorpus') logFilePath = os.path.join(self.td, 'ucLogFile') self.uc = unnaturalCode(logFilePath=logFilePath) self.cm = mitlmCorpus(readCorpus=readCorpus, writeCorpus=readCorpus, uc=ucGlobal) self.sm = sourceModel(cm=self.cm, language=pythonSource)
def __init__(self): self.homeDir = os.path.expanduser("~") self.ucDir = os.getenv("UC_DATA", os.path.join(self.homeDir, ".unnaturalCode")) if not os.path.exists(self.ucDir): os.makedirs(self.ucDir) assert os.access(self.ucDir, os.X_OK & os.R_OK & os.W_OK) assert os.path.isdir(self.ucDir) self.readCorpus = os.path.join(self.ucDir, 'pyCorpus') self.logFilePath = os.path.join(self.ucDir, 'pyLogFile') self.uc = unnaturalCode(logFilePath=self.logFilePath) self.cm = mitlmCorpus(readCorpus=self.readCorpus, writeCorpus=self.readCorpus, uc=self.uc) self.lm = pythonSource self.sm = sourceModel(cm=self.cm, language=self.lm)
def testCorpify(self): sm = sourceModel(cm=mitlmCorpus()) self.assertEquals(sm.corpify(pythonSource(someLexemes)), 'print ( 1 + 2 ** 2 ) <ENDMARKER>')