def erd (actual, predicted, delimiter='\t', idcol=0, catcol=2, NumRP=0): """Score one file against another. File format: ...<id>...<category>... """ mlc = MuLabCat(util.title_from_2paths(actual,predicted), reassign=False,NumRP=NumRP) adict = util.read_multimap(actual,delimiter,col1=idcol,col2=catcol,logger=MuLabCat.logger) pdict = util.read_multimap(predicted,delimiter,col1=idcol,col2=catcol,logger=MuLabCat.logger) for obj,acat in adict.iteritems(): mlc.add(acat,pdict.get(obj,frozenset())) for obj,pcat in pdict.iteritems(): if obj not in adict: mlc.add(frozenset(),pcat) return mlc
def score (actual, predicted, delimiter='\t', abeg=1, pbeg=1, NumRP=0): """Score one file against another. File format: <query string>[<delimiter><category>]+ The queries in both files must be identical. abeg and pbeg are the columns where actual and pedicted categories start. """ mlc = MuLabCat(util.title_from_2paths(actual,predicted),NumRP=NumRP) util.reading(actual,MuLabCat.logger) util.reading(predicted,MuLabCat.logger) empty = frozenset(['']) with open(actual) as af, open(predicted) as pf: for sa,sp in itertools.izip_longest(csv.reader(af,delimiter=delimiter), csv.reader(pf,delimiter=delimiter)): if sa is None or sp is None: raise ValueError("uneven files",af,pf,sa,sp) if sa[0] != sp[0]: raise ValueError("query string mismatch",sa,sp) mlc.add(frozenset(sa[abeg:])-empty,frozenset(sp[pbeg:])-empty) return mlc
def score(actual, predicted, delimiter='\t', abeg=1, pbeg=1, NumRP=0): """Score one file against another. File format: <query string>[<delimiter><category>]+ The queries in both files must be identical. abeg and pbeg are the columns where actual and pedicted categories start. """ mlc = MuLabCat(util.title_from_2paths(actual, predicted), NumRP=NumRP) util.reading(actual, MuLabCat.logger) util.reading(predicted, MuLabCat.logger) empty = frozenset(['']) with open(actual) as af, open(predicted) as pf: for sa, sp in itertools.izip_longest( csv.reader(af, delimiter=delimiter), csv.reader(pf, delimiter=delimiter)): if sa is None or sp is None: raise ValueError("uneven files", af, pf, sa, sp) if sa[0] != sp[0]: raise ValueError("query string mismatch", sa, sp) mlc.add( frozenset(sa[abeg:]) - empty, frozenset(sp[pbeg:]) - empty) return mlc
def erd(actual, predicted, delimiter='\t', idcol=0, catcol=2, NumRP=0): """Score one file against another. File format: ...<id>...<category>... """ mlc = MuLabCat(util.title_from_2paths(actual, predicted), reassign=False, NumRP=NumRP) adict = util.read_multimap(actual, delimiter, col1=idcol, col2=catcol, logger=MuLabCat.logger) pdict = util.read_multimap(predicted, delimiter, col1=idcol, col2=catcol, logger=MuLabCat.logger) for obj, acat in adict.iteritems(): mlc.add(acat, pdict.get(obj, frozenset())) for obj, pcat in pdict.iteritems(): if obj not in adict: mlc.add(frozenset(), pcat) return mlc