Пример #1
0
 def testExtractionIsValidXml(self):
     uE = UnitExtractor()
     plainTextRcp = XmlParser(createCueMLDom([getRecipeB49()])).getPlainTextRecipes().__next__()
     iE = IngredientExtractor(parse(pathToListIngredients))
     e = Extractor(iE, uE)
     xmlString = e.extractRecipe(plainTextRcp)
     try:
         parseString(xmlString).toprettyxml()
     except Exception as e:
         self.fail("Not valid xml extracted: {}".format(str(e)))
class IngredientExtractorTest(unittest.TestCase):
    """Caution: These tests depend on the listIngredients.xml within setUp()"""
    def setUp(self):
        unittest.TestCase.setUp(self)
        dom = parse(pathToListIngredients)
        self.ingE = IngredientExtractor(dom)

    def testWein(self):
        candis = self.ingE.getIngredientCandidates("Wein")
        self.assertLess(1, len(candis))
        self.assertTrue("Rotwein" in [candi.xmlID for candi in candis])

    def testMidder(self):
        candis = self.ingE.getIngredientCandidates("Midder")
        self.assertEqual(1, len(candis))
        self.assertEqual("Midder", candis[0].basicForm)
        self.assertEqual("Midder", candis[0].xmlID)

    def testNoIngredient(self):
        candis = self.ingE.getIngredientCandidates("asfd")
        self.assertIsNone(candis)

    def testBouillon(self):
        candis = self.ingE.getIngredientCandidates("Bouillon")
        self.assertIsNotNone(candis)
        self.assertEqual(0, len(candis))

    def testFleisch(self):
        candis = self.ingE.getIngredientCandidates("Fleisch")
        self.assertLess(1, len(candis))
        self.assertIn("Rindfleisch", [candi.basicForm for candi in candis])

    def testSchweinefleisch(self):
        candis = self.ingE.getIngredientCandidates("Schweinefleisch")
        self.assertEqual([], candis)
Пример #3
0
class EvalRecipesTest(unittest.TestCase):
    ingE = IngredientExtractor(parse(pathToListIngredients))
    unitE = UnitExtractor()
    extractor = Extractor(ingE, unitE)

    def testPrecisionOf2Recipes(self):
        iEIngs, goldenStandardIngs = getIEIngsAndGoldenStandardIngs()
        attris = set(("ref", ))
        retrievedAndRelevant, relevant = recallOf2Recipes(
            goldenStandardIngs, iEIngs, attris)
        self.assertEqual(1, retrievedAndRelevant)
        self.assertEqual(1, relevant)

    def testRecallOf2Recipes(self):
        iEIngs, goldenStandardIngs = getIEIngsAndGoldenStandardIngs()
        attris = set(("ref", ))
        retrievedAndRelevant, retrieved = precisionOf2Recipes(
            goldenStandardIngs, iEIngs, attris)
        self.assertEqual(1, retrievedAndRelevant)
        self.assertEqual(3, retrieved)
 def setUp(self):
     self.ingE = IngredientExtractor(parse(pathToListIngredients))
     self.unitE = UnitExtractor()
from parserForDavidisCookbook.XmlParser import XmlParser
from xml.dom.minidom import parse
from informationExtraction.IngredientExtractor import IngredientExtractor
from informationExtraction.UnitExtractor import UnitExtractor
import time
from informationExtraction.Extractor import Extractor
from evaluation.evalRecipes import evalFromFiles
from model.PlainTextRecipe import PlainTextRecipe
from informationExtraction.dictBasedExtractor import dictBasedEnrichment
from informationExtraction.ruleBasedExtractor import applyRulesToWordProperties

evalAttris = set(
    ["quantity", "atLeast", "atMost", "unit"]
)  #["quantity", "atLeast", "atMost", "unit", "optional", "altGrp"]) #attris which should be relevant in evaluation
ingE = IngredientExtractor(
    parse(
        "/home/torsten/Desktop/MyMasterThesis/docs/DavidisesKochbuch/listIngredients.xml"
    ))
unitE = UnitExtractor()
goldenStandardPath = "/home/torsten/Desktop/MyMasterThesis/docs/DavidisesKochbuch/GoldenStandard.xml"  # contains B-1 to B-68 labeled and the rest unlabeled
defaultErgFilePath = "erg.xml"


def evalRecipes(rcpIds=["B-{}".format(i) for i in range(1, 51)],
                debug=True,
                attris=evalAttris,
                ergFilePath=defaultErgFilePath):
    startTime = time.time()
    cookbook = XmlParser(parse(goldenStandardPath))
    extractor = Extractor(ingE, unitE)
    extractor.extractRecipes2TEICueML(cookbook.getPlainTextRecipes(rcpIds),
                                      ergFilePath)
 def setUp(self):
     unittest.TestCase.setUp(self)
     dom = parse(pathToListIngredients)
     self.ingE = IngredientExtractor(dom)