Пример #1
0
    def testGeoTextAndGrapy(self):
        '''
        test the GeoText and geograpy3 library
        '''
        debug = True
        limit = 100
        sqlQuery = """select count(*) as count,
locality from Event_wikicfp
where locality is not null
group by locality
order by 1 desc
LIMIT %d
""" % limit
        dbFile = Lookup.getDBFile()
        if os.path.isfile(dbFile):
            sqlDB = SQLDB(dbFile)
        else:
            lookup = Lookup.ensureAllIsAvailable("testGeoText")
            sqlDB = lookup.getSQLDB()
        if sqlDB is not None:
            print("testGeoText from database %s " % sqlDB.dbname)
            totalResult = sqlDB.query("""select count(*) as count
  from event_wikicfp
  where locality is not null""")
            total = totalResult[0]['count']
            listOfDicts = sqlDB.query(sqlQuery)
            index = 0
            rsum = 0
            found = 0
            problems = []
            for record in listOfDicts:
                locality = record['locality']
                count = record['count']
                index += 1
                rsum += count
                print("%5d: %5d/%5d %5.1f%%=%s" %
                      (index, count, rsum, rsum / total * 100, locality))
                geo = GeoText(locality)
                if debug:
                    print("  %s" % geo.countries)
                    print("  %s" % geo.cities)
                city = geograpy.locateCity(locality)
                if city is not None:
                    found += 1
                else:
                    problems.append(locality)
                if debug:
                    print("  %s%s" % (city, '✅' if city is not None else '❌'))
            if self.debug:
                print("found %d/%d = %5.1f%%" %
                      (found, limit, found / limit * 100))
                print("problems: %s" % problems)
            self.assertTrue(found / limit > 0.8)
        pass
Пример #2
0
 def testLookup(self):
     '''
     test the number of sources and storing to "Event_all"
     '''
     if getpass.getuser() != "travis":
         lookup = Lookup("test")
         self.assertEqual(8, len(lookup.ems))
         errors = lookup.store('Event_all')
         self.assertEqual(0, len(errors))
         lookup.createView()
     pass
Пример #3
0
 def testExamples(self):
     '''
     test reading the examples
     '''
     examples=Lookup.getExamples()
     print (examples)
     self.assertEqual(16,len(examples.keys()))
     pass
 def getWordUsageDB(self):
     '''
     get the Word usage database
     '''
     wordUsageDBFile = Lookup.getDBFile("wordusage")
     if os.path.isfile(wordUsageDBFile):
         wSQLDB = SQLDB(wordUsageDBFile)
         return wSQLDB
     return None
Пример #5
0
    def testPlantUml(self):
        '''
        get plant UML functionality 
        '''
        schemaManager = None
        if getpass.getuser() != "travis":
            o = Ontology()
            schemaManager = o.getRQSchema(
                fromCache=False)  # to force SMW query

        lookup = Lookup("plantuml", getAll=False, butNot='or')
        dbfile = lookup.getDBFile('Event_all')
        sqlDB = SQLDB(dbfile)
        tableList = sqlDB.getTableList()
        eventTableList = []
        eventSchemas = lookup.getEventSchemas()
        for table in tableList:
            tableName = table['name']
            if tableName.startswith("Event_"):
                table['schema'] = eventSchemas[tableName]
                eventTableList.append(table)
                countQuery = "SELECT count(*) as count from %s" % tableName
                countResult = sqlDB.query(countQuery)
                table['instances'] = countResult[0]['count']
        self.assertEqual(8, len(eventTableList))
        uml = UML()
        now = datetime.now()
        nowYMD = now.strftime("%Y-%m-%d")
        title = """ConfIDent  Entities
%s
[[https://projects.tib.eu/en/confident/ © 2019-2020 ConfIDent project]]
see also [[http://ptp.bitplan.com/settings Proceedings Title Parser]]
""" % nowYMD
        plantUml = uml.mergeSchema(schemaManager,
                                   eventTableList,
                                   title=title,
                                   packageName='DataDonations',
                                   generalizeTo="Event")
        print(plantUml)
        self.assertTrue("Event <|-- Event_confref" in plantUml)
        self.assertTrue("class Event " in plantUml)
 def testWordParser(self):
     '''
     try finding quantiles
     
     see https://stackoverflow.com/questions/2374640/how-do-i-calculate-percentiles-with-python-numpy
     '''
     lookup = Lookup("test Word parser")
     sqlDB = lookup.getSQLDB()
     if sqlDB is not None:
         totalWordUsages = []
         for source in ['wikidata', 'crossref', 'dblp', 'CEUR-WS']:
             listOfDicts = TestWordParser.getProceedingsTitles(
                 sqlDB, source)
             cwp = CorpusWordParser()
             wordusages = cwp.parse(listOfDicts)
             lens = {}
             for wordusage in wordusages:
                 totalWordUsages.append(wordusage.__dict__)
                 if wordusage.eventId in lens:
                     lens[wordusage.eventId] += 1
                 else:
                     lens[wordusage.eventId] = 1
             df = DataFrame(lens.values())
             print(df.quantile(1))
             quantileValues = df.quantile(.90)
             print(quantileValues)
             plot = Plot(lens.values(),
                         "%s wordcount histogram" % source,
                         xlabel="wordcount",
                         ylabel="frequency")
             plot.hist(mode='save')
         wordUsageDBFile = Lookup.getDBFile("wordusage")
         wSQLDB = SQLDB(wordUsageDBFile)
         entityInfo = wSQLDB.createTable(totalWordUsages,
                                         "wordusage",
                                         withDrop=True)
         wSQLDB.store(totalWordUsages, entityInfo)
def index(titles="",
          tc=None,
          errs=None,
          result=None,
          message=None,
          metadebug=True):
    """ render index page with the given parameters"""
    return render_template('index.html',
                           titles=titles,
                           tc=tc,
                           errs=errs,
                           result=result,
                           message=message,
                           metadebug=metadebug,
                           examples=Lookup.getExamples())
Пример #8
0
 def test_SQL(self):
     '''
     test SQL queries
     '''
     qm=QueryManager(lang='sql',debug=False)
     self.assertEqual(17,len(qm.queriesByName))
     lookup=Lookup.ensureAllIsAvailable()
     sqlDB=lookup.getSQLDB()
     for name,query in qm.queriesByName.items():
         listOfDicts=sqlDB.query(query.query)
         markup=query.asWikiMarkup(listOfDicts)
         print("== %s ==" % (name))
         print("=== query ===")
         print (query.asWikiSourceMarkup())
         print("=== result ===")
         print(markup)
 def testError(self):
     ''' test error handling according to https://github.com/WolfgangFahl/ProceedingsTitleParser/issues/4 '''
     lookup=Lookup("testError")
     tp=lookup.tp
     self.assertTrue("Innsbruck" in tp.dictionary.tokens)
     titles=['Tagungsband des 17. Workshops "Software Engineering im Unterricht der Hochschulen" 2020 (SEUH 2020),Innsbruck, Österreich, 26. - 27.02.2020.']
     tp.fromLines(titles, 'line')
     tc,errs,result=tp.parseAll()
     # there should be a failed entry in the counter
     self.assertEqual(1,tc["fail"])
     self.assertEqual(1,len(errs))
     err=errs[0]
     self.assertTrue("Expected" in str(err))
     self.assertEqual(1,len(result))
     title=result[0]
     print (title.metadata())
     self.assertTrue('city' in title.metadata())
     print (title.notfound)
 def testExtractMode(self):
     ''' test extract mode '''
     lookup=Lookup("testExtractMode")
     urls=['http://ceur-ws.org/Vol-2635/',
           'http://ceur-ws.org/Vol-2599/',
           'http://ceur-ws.org/Vol-2553/',
           'http://ceur-ws.org/Vol-2512/',
           'http://ceur-ws.org/Vol-2489/',
           'http://ceur-ws.org/',
           'http://ceur-ws.org/Vol-9999/']
     tp=lookup.tp
     tp.fromLines(urls,'line')  
     tc,errs,result=tp.parseAll()
     print (tc)
     print (errs)
     print (result)
     # expect 4 ok 1 fail and 2 invalid/ignored
     self.assertEqual(4,tc['success']);
     self.assertEqual(1,tc['fail']);
Пример #11
0
 def testCreateEventAll(self):
     '''
     check that the event all database is created correctly
     '''
     withWikiData = True
     lookup = Lookup("CreateEventAll")
     self.assertEqual(8, len(lookup.ems))
     errors = lookup.check(lookup.getSQLDB(), debug=True)
     if len(errors) > 0:
         print(errors)
         sqlDB = lookup.createEventAll(maxAgeMin=0,
                                       withWikiData=withWikiData)
         errors = lookup.check(sqlDB, debug=True)
     if len(errors) > 0:
         self.assertEqual(0, len(errors))
Пример #12
0
 def testSpacy(self):
     '''
     test the space NLP library
     '''
     nlp = spacy.load('en_core_web_sm')
     index = 0
     limit = 100
     lookup = Lookup.ensureAllIsAvailable()
     sqlDB = lookup.getSQLDB()
     if sqlDB is not None:
         for source in ['wikidata', 'crossref', 'dblp', 'CEUR-WS']:
             listOfDicts = TestWordParser.getProceedingsTitles(
                 sqlDB, source)
             for record in listOfDicts:
                 title = record['title']
                 doc = nlp(title)
                 print("found %d entities in %s:%s" %
                       (len(doc.ents), record['eventId'], title))
                 for ent in doc.ents:
                     print("  %s(%s)" % (ent, ent.label_))
                 index += 1
                 if index > limit: break
             if index > limit: break
 def testNERMode(self):
     ''' test named entity recognition mode '''
     lookup=Lookup("testNerMode")
     tp=lookup.tp
     titles=['ATVA 2020 18th International Symposium on Automated Technology for Verification and Analysis',
     'Proceedings of the 8th International Workshop on Bibliometric-enhanced Information Retrieval (BIR)co-located with the 41st European Conference on Information Retrieval (ECIR 2019)Cologne, Germany, April 14th, 2019.']
     tp.fromLines(titles,'line')  
     tc,errs,result=tp.parseAll()
     print (tc)
     print (errs)
     print (result)
     # make sure we have exactly one result
     self.assertEqual(2,len(result))
     for title in result:
         print (title)
         print (title.info)
         print (title.metadata())
         print (title.notfound)
         # make sure we found the relevant event
         self.assertTrue(len(title.events)>0)
         print (title.events)
         for event in title.events:
             print (event.url)
from flask.helpers import send_from_directory
import argparse
import sys
from ptp.lookup import Lookup

#from json2xml import json2xml
#https://github.com/vinitkumar/json2xml/issues/59
#from flask_accept import accept

scriptdir = os.path.dirname(os.path.abspath(__file__))
app = Flask(__name__,
            static_url_path='',
            static_folder=scriptdir + '/../web',
            template_folder=scriptdir + '/../templates')
# create a most current eventall database
lookup = Lookup("webserver", singleDB=True)


@app.route('/')
def home():
    return index()


def index(titles="",
          tc=None,
          errs=None,
          result=None,
          message=None,
          metadebug=True):
    """ render index page with the given parameters"""
    return render_template('index.html',