def test_docs_serialization_format(self): import json from eWRT.util.module_path import get_resource DOCS = [{'id': 7, 'body': 'Ehre sei Gott.', 'title': '', 'format': 'text/html', 'header': {'test': 'testvalue'}}, {'id': 8, 'body': '', 'title': 'Guten Tag!', 'format': 'text/html', 'header': {}}] REFERENCE_MULTI = json.load( open(get_resource(__file__, 'data/jeremia_reference_output_documents.json'))) REFERENCE_SINGLE = json.load(open(get_resource( __file__, 'data/jeremia_reference_output_single_document.json'))) # document list j = Jeremia() result = j.submit_documents(DOCS) result.sort() REFERENCE_MULTI.sort() assert REFERENCE_MULTI == result # single document result = j.submit_document(DOCS[0]) assert REFERENCE_SINGLE == result
def test_submit_classify_v2(self): ''' test the version 2 classifier ''' weblyzard_xml = open(get_resource( __file__, 'data/classifier_v2_testfile.xml')).read() classifier = Classifier() search_agents = [ { "name": "Santésuisse", "id": 412, "product_list": [ {"name": "SANTESUISSE FINANZ ENGAGEMENT RP", "id": 327432}, {"name": "SANTESUISSE FINANZ ENTWICKLUNG RP", "id": 327435}, {"name": "SANTESUISSE FINANZ PERSONEN RP", "id": 327442}, {"name": "SANTESUISSE FINANZ PRODUKTE RP", "id": 327444}, {"name": "SANTESUISSE FINANZ REGULATION RP", "id": 327446}, {"name": "SANTESUISSE FINANZ RESEARCH RP", "id": 327452}, {"name": "SANTESUISSE VERS. ALLGEMEIN RP", "id": 327562}, {"name": "SANTESUISSE VERS. ENGAGEMENT RP", "id": 327564}, {"name": "SANTESUISSE VERS. ENTWICKLUNG RP", "id": 327566}, {"name": "SANTESUISSE VERS. PERSONEN RP", "id": 327568}, {"name": "SANTESUISSE VERS. PRODUKTE RP", "id": 327570}, {"name": "SANTESUISSE VERS. REGULATION RP", "id": 327572}, {"name": "SANTESUISSE VERS. RESEARCH RP", "id": 327574}, {"name": "SANTESUISSE FINANZ ALLGEMEIN RP", "id": 327428} ]}, { "name": "Krankenkassen", "id": 460, "product_list": [ {"name": "KRANKENKASSEN FINANZ ENGAGEMENT RP", "id": 342053}, {"name": "KRANKENKASSEN FINANZ ENTWICKLUNG RP", "id": 342055}, {"name": "KRANKENKASSEN FINANZ PERSONEN RP", "id": 342056}, {"name": "KRANKENKASSEN FINANZ PRODUKTE RP", "id": 342057}, {"name": "KRANKENKASSEN FINANZ REGULATION RP", "id": 342058}, {"name": "KRANKENKASSEN FINANZ RESEARCH RP", "id": 342059}, {"name": "KRANKENKASSEN VERS. ALLGEMEIN RP", "id": 342060}, {"name": "KRANKENKASSEN VERS. ENGAGEMENT RP", "id": 342061}, {"name": "KRANKENKASSEN VERS. ENTWICKLUNG RP", "id": 342062}, {"name": "KRANKENKASSEN VERS. PERSONEN RP", "id": 342063}, {"name": "KRANKENKASSEN VERS. PRODUKTE RP", "id": 342064}, {"name": "KRANKENKASSEN VERS. REGULATION RP", "id": 342065}, {"name": "KRANKENKASSEN VERS. RESEARCH RP", "id": 342066}, {"name": "KRANKENKASSEN FINANZ ALLGEMEIN RP", "id": 342052} ]} ] num_results = 3 # call the web service result = classifier.classify_v2('COMET', weblyzard_xml=weblyzard_xml, search_agents=search_agents, num_results=num_results) # every search_agent should be covered in the result assert set(result.keys()) == set( self.get_search_agent_ids(search_agents)) # for every search_agent are 'num_results' classes returned for _search_agent, classes in result.items(): assert len(classes) == num_results print(result)
def test_docs_serialization_format(self): import json from eWRT.util.module_path import get_resource DOCS = [{ 'id': 7, 'body': 'Ehre sei Gott.', 'title': '', 'format': 'text/html', 'header': { 'test': 'testvalue' } }, { 'id': 8, 'body': '', 'title': 'Guten Tag!', 'format': 'text/html', 'header': {} }] REFERENCE_MULTI = json.load( open( get_resource(__file__, 'data/jeremia_reference_output_documents.json'))) REFERENCE_SINGLE = json.load( open( get_resource( __file__, 'data/jeremia_reference_output_single_document.json'))) # document list j = Jeremia() result = j.submit_documents(DOCS) result.sort() REFERENCE_MULTI.sort() assert REFERENCE_MULTI == result # single document result = j.submit_document(DOCS[0]) assert REFERENCE_SINGLE == result
#!/usr/bin/env python from eWRT.input.csv import get_csv_data from eWRT.util.module_path import get_resource TEST_FILE = get_resource(__file__, ('test.csv', )) def test_csv_data(): CORRECT = ( [1,2],[2,4],[4,6] ) with open(TEST_FILE) as f: for correct, computed in zip(CORRECT, get_csv_data(f, ('int(row["a"])', 'int(row["a"])+int(row["b"])'), 'row["show"]=="True"')): print correct, computed assert correct == computed
from bz2 import BZ2File from csv import reader from glob import glob from os.path import dirname, basename, join as os_join from collections import namedtuple from datetime import datetime from twisted.python.text import strFile from eWRT.util.module_path import get_resource def extract_index_name(fname): return basename(fname).split(".")[0] DATA_DIR = get_resource(__file__, ('data', )) Quote = namedtuple('quote', 'date last open high low change_percentage') class StockIndex(object): SUPPORTED_INDICES = { extract_index_name(fname): fname for fname in glob(DATA_DIR + "/*.idx.bz2") } SUPPORTED_FUTURES = { extract_index_name(fname): fname for fname in glob(DATA_DIR + "/*.ftidx.bz2") } DATE_FORMAT = "%b %d, %Y"
# from builtins import zip from builtins import str from builtins import range import pytest import unittest from multiprocessing import Pool from shutil import rmtree from os.path import exists, join from eWRT.util.module_path import get_resource from eWRT.util.cache import (MemoryCache, MemoryCached, DiskCached, DiskCache, Cache, IterableCache, RedisCached) get_cache_dir = lambda no: get_resource(__file__, ('.unittest-temp%d' % (no), )) class TestCached(unittest.TestCase): ''' tests the MemoryCached Decorator ''' @staticmethod def add(a=2, b=3): return a + b @staticmethod def sub(a=2, b=3): return a - b def testNonKeywordArguments(self): ''' tests the class with non Keyword Arguments ''' for x in range(1, 20):
def test_submit_classify_v2(self): ''' test the version 2 classifier ''' weblyzard_xml = open( get_resource(__file__, 'data/classifier_v2_testfile.xml')).read() classifier = Classifier() search_agents = [{ "name": "Santésuisse", "id": 412, "product_list": [{ "name": "SANTESUISSE FINANZ ENGAGEMENT RP", "id": 327432 }, { "name": "SANTESUISSE FINANZ ENTWICKLUNG RP", "id": 327435 }, { "name": "SANTESUISSE FINANZ PERSONEN RP", "id": 327442 }, { "name": "SANTESUISSE FINANZ PRODUKTE RP", "id": 327444 }, { "name": "SANTESUISSE FINANZ REGULATION RP", "id": 327446 }, { "name": "SANTESUISSE FINANZ RESEARCH RP", "id": 327452 }, { "name": "SANTESUISSE VERS. ALLGEMEIN RP", "id": 327562 }, { "name": "SANTESUISSE VERS. ENGAGEMENT RP", "id": 327564 }, { "name": "SANTESUISSE VERS. ENTWICKLUNG RP", "id": 327566 }, { "name": "SANTESUISSE VERS. PERSONEN RP", "id": 327568 }, { "name": "SANTESUISSE VERS. PRODUKTE RP", "id": 327570 }, { "name": "SANTESUISSE VERS. REGULATION RP", "id": 327572 }, { "name": "SANTESUISSE VERS. RESEARCH RP", "id": 327574 }, { "name": "SANTESUISSE FINANZ ALLGEMEIN RP", "id": 327428 }] }, { "name": "Krankenkassen", "id": 460, "product_list": [{ "name": "KRANKENKASSEN FINANZ ENGAGEMENT RP", "id": 342053 }, { "name": "KRANKENKASSEN FINANZ ENTWICKLUNG RP", "id": 342055 }, { "name": "KRANKENKASSEN FINANZ PERSONEN RP", "id": 342056 }, { "name": "KRANKENKASSEN FINANZ PRODUKTE RP", "id": 342057 }, { "name": "KRANKENKASSEN FINANZ REGULATION RP", "id": 342058 }, { "name": "KRANKENKASSEN FINANZ RESEARCH RP", "id": 342059 }, { "name": "KRANKENKASSEN VERS. ALLGEMEIN RP", "id": 342060 }, { "name": "KRANKENKASSEN VERS. ENGAGEMENT RP", "id": 342061 }, { "name": "KRANKENKASSEN VERS. ENTWICKLUNG RP", "id": 342062 }, { "name": "KRANKENKASSEN VERS. PERSONEN RP", "id": 342063 }, { "name": "KRANKENKASSEN VERS. PRODUKTE RP", "id": 342064 }, { "name": "KRANKENKASSEN VERS. REGULATION RP", "id": 342065 }, { "name": "KRANKENKASSEN VERS. RESEARCH RP", "id": 342066 }, { "name": "KRANKENKASSEN FINANZ ALLGEMEIN RP", "id": 342052 }] }] num_results = 3 # call the web service result = classifier.classify_v2('COMET', weblyzard_xml=weblyzard_xml, search_agents=search_agents, num_results=num_results) # every search_agent should be covered in the result assert set(result.keys()) == set( self.get_search_agent_ids(search_agents)) # for every search_agent are 'num_results' classes returned for _search_agent, classes in result.items(): assert len(classes) == num_results print(result)
from os.path import basename from eWRT.util.module_path import get_resource def read_wordlist(fname): ''' reads a language wordlist from a file ''' with open(fname) as f: return set(map(str.lower, map(str.strip, f.readlines()))) # returns the language name based on the language file's name def get_lang_name(fname): return basename(fname).split(".")[0] LANG_DATA_DIR = get_resource(__file__, 'data') ## # \var STOPWORD_DICT: a dictionary of the 100 most common words in the given language STOPWORD_DICT = {get_lang_name(fname): read_wordlist( fname) for fname in glob(LANG_DATA_DIR + "/*.csv")} DELETE_CHARS = ",.!?\"'" DELETE_TABLE = {ch: None for ch in DELETE_CHARS} import string table = string.maketrans('ac', 'cx') def detect_language(text): """
from bz2 import BZ2File from csv import reader from glob import glob from os.path import dirname, basename, join as os_join from collections import namedtuple from datetime import datetime from twisted.python.text import strFile from eWRT.util.module_path import get_resource def extract_index_name(fname): return basename(fname).split(".")[0] DATA_DIR = get_resource(__file__, ('data', )) Quote = namedtuple('quote', 'date last open high low change_percentage') class StockIndex(object): SUPPORTED_INDICES = {extract_index_name(fname): fname for fname in glob(DATA_DIR + "/*.idx.bz2")} SUPPORTED_FUTURES = {extract_index_name(fname): fname for fname in glob(DATA_DIR + "/*.ftidx.bz2")} DATE_FORMAT = "%b %d, %Y" @classmethod def get_index(cls, index_name): """retrieves data from the given index in csv format @param index_name: the name of the index
from eWRT.util.module_path import get_resource def read_wordlist(fname): ''' reads a language wordlist from a file ''' with open(fname, 'r', encoding='utf-8') as f: return set(map(str.lower, list(map(str.strip, f.readlines())))) # returns the language name based on the language file's name def get_lang_name(fname): return basename(fname).split(".")[0] LANG_DATA_DIR = get_resource(__file__, 'data') # # # \var STOPWORD_DICT: a dictionary of the 100 most common words in the given language STOPWORD_DICT = { get_lang_name(fname): read_wordlist(fname) for fname in glob(LANG_DATA_DIR + "/*.csv") } DELETE_CHARS = ",.!?\"'" DELETE_TABLE = {ch: None for ch in DELETE_CHARS} # import string [mig] # table = str.maketrans('ac', 'cx') # [mig] string --> str def detect_language(text):
# # Unittests # run nosetest from python-nose to execute these tests # from shutil import rmtree from multiprocessing import Pool from nose.plugins.attrib import attr from eWRT.util.cache import * from eWRT.util.module_path import get_resource get_cache_dir = lambda no: get_resource(__file__, ('.unittest-temp%d' % (no), )) class TestCached(object): ''' tests the MemoryCached Decorator ''' @staticmethod def add(a=2, b=3): return a+b @staticmethod def sub(a=2, b=3): return a-b def testNonKeywordArguments(self): ''' tests the class with non Keyword Arguments ''' for x in xrange(1,20): assert self.add(x,5) == (x+5) assert self.add(x,5) == (x+5) # test objects with a cachesize specified
#!/usr/bin/env python from eWRT.input.csv import get_csv_data from eWRT.util.module_path import get_resource TEST_FILE = get_resource(__file__, ('test.csv', )) def test_csv_data(): CORRECT = ([1, 2], [2, 4], [4, 6]) with open(TEST_FILE) as f: for correct, computed in zip( CORRECT, get_csv_data(f, ('int(row["a"])', 'int(row["a"])+int(row["b"])'), 'row["show"]=="True"')): print correct, computed assert correct == computed