Пример #1
0
	def __init__(self, idir, name2file):
		super(Sleigh, self).__init__('', idir)
		self.venues = []
		self.n2f = name2file
		jsons = {}
		skip4Now = []
		for d in glob.glob(idir+'/*.json'):
			if d.split('/')[-1].split('.')[0] in skip4Now:
				print(C.red('Skipping') + ' ' + C.purple(d) + ' ' + C.red('for now'))
				continue
			jsons[d.split('/')[-1].split('.')[0]] = d
		for d in glob.glob(idir+'/*'):
			cont = False
			for end in ('.md', '.json', '/frem', '/edif'):
				if d.endswith(end):
					cont = True
			if d.split('/')[-1] in skip4Now:
				print(C.red('Skipping') + ' ' + C.purple(d) + ' ' + C.red('for now'))
				cont = True
			if cont:
				continue
			if d.split('/')[-1] not in jsons.keys():
				print(C.red('Legacy non-top definition of'), d)
				self.venues.append(Venue(d, idir, name2file, self))
			else:
				self.venues.append(Venue(d, idir, name2file, self))
Пример #2
0
 def __init__(self, idir, name2file):
     super(Sleigh, self).__init__('', idir)
     self.venues = []
     self.n2f = name2file
     jsons = {}
     skip4Now = []
     for d in glob.glob(idir + '/*.json'):
         if lastSlash(d).split('.')[0] in skip4Now:
             print(
                 C.red('Skipping') + ' ' + C.purple(d) + ' ' +
                 C.red('for now'))
             continue
         jsons[lastSlash(d).split('.')[0]] = d
     for d in glob.glob(idir + '/*'):
         cont = False
         for end in ('.md', '.json', '/frem', '/edif'):
             if d.endswith(end):
                 cont = True
         if d.split('/')[-1] in skip4Now:
             print(
                 C.red('Skipping') + ' ' + C.purple(d) + ' ' +
                 C.red('for now'))
             cont = True
         if cont:
             continue
         if lastSlash(d) not in jsons.keys():
             print(C.red('Legacy non-top definition of'), d)
             if lastSlash(d) not in ('edif', 'frem'):
                 self.venues.append(Venue(d, idir, name2file, self))
         else:
             self.venues.append(Venue(d, idir, name2file, self))
Пример #3
0
        print('[ {} ] {}'.format(statuses[r], fn))
    return r


def two(n):
    if n < 10:
        return '0{}'.format(n)
    else:
        return '{}'.format(n)


if __name__ == "__main__":
    verbose = sys.argv[-1] == '-v'
    peoplez = glob.glob(ienputdir + '/people/*.json')
    print('{}: {} venues, {} papers by {} people\n{}'.format(\
     C.purple('BibSLEIGH'),
     C.red(len(sleigh.venues)),
     C.red(sleigh.numOfPapers()),
     C.red(len(peoplez)),
     C.purple('='*42)))
    cx = {0: 0, 1: 0, 2: 0}
    # stem ALL the papers!
    for v in sleigh.venues:
        for c in v.getConfs():
            for p in c.papers:
                cx[checkreport(p.filename, p, None)] += 1
        for b in v.getBrands():
            cx[checkreport(b.filename, None, b)] += 1
    # write all stems
    listOfStems = sorted(filter(ifApproved, ALLSTEMS),
                         key=lambda w: two(len(w)) + w)
Пример #4
0
def checkon(fn, o):
    if not os.path.exists(fn) or os.path.isdir(fn):
        fn = fn + '.json'
    f = open(fn, 'r', encoding='utf-8')
    lines = f.readlines()[1:-1]
    f.close()
    flines = json2lines(lines)
    plines = sorted(json2lines(o.getJSON().split('\n')))
    # "url" from DBLP are useless
    if 'url' in o.json.keys():
        o.json['url'] = [link.replace('https://', 'http://')\
            for link in listify(o.json['url'])\
             if not link.startswith('db/conf/')\
             and not link.startswith('db/series/')\
             and not link.startswith('db/books/')\
            and not link.startswith('db/journals/')]
        if not o.json['url']:
            del o.json['url']
        elif len(o.json['url']) == 1:
            o.json['url'] = o.json['url'][0]
    if 'ee' in o.json.keys() and 'doi' not in o.json.keys():
        if isinstance(o.json['ee'], list):
            if verbose:
                print(C.red('Manylink:'), o.json['ee'])
        newee = []
        for onelink in listify(o.json['ee']):
            if onelink.startswith('http://dx.doi.org/'):
                o.json['doi'] = onelink[18:]
            elif onelink.startswith('http://doi.acm.org/'):
                o.json['doi'] = onelink[19:]
            elif onelink.startswith('http://doi.ieeecomputersociety.org/'):
                o.json['doi'] = onelink[35:]
            elif onelink.startswith('http://dl.acm.org/citation.cfm?id='):
                o.json['acmid'] = onelink[34:]
            elif onelink.startswith('http://portal.acm.org/citation.cfm?id='):
                o.json['acmid'] = onelink[38:]
            elif onelink.startswith('http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=')\
              or onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber='):
                o.json['ieeearid'] = onelink.split('=')[-1]
            elif onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=')\
             and onelink.find('arnumber') > -1:
                o.json['ieeearid'] = onelink.split('arnumber=')[-1].split(
                    '&')[0]
            elif onelink.startswith(
                    'http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber='
            ):
                o.json['ieeepuid'] = onelink.split('=')[-1]
            elif onelink.startswith(
                    'http://ieeexplore.ieee.org/xpl/tocresult.jsp?isnumber='):
                o.json['ieeeisid'] = onelink.split('=')[-1]
            elif onelink.startswith(
                    'http://eceasst.cs.tu-berlin.de/index.php/eceasst/article/view/'
            ):
                newee.append(
                    'http://journal.ub.tu-berlin.de/eceasst/article/view/' +
                    onelink.split('/')[-1])
            elif onelink.endswith('.pdf') and \
                (onelink.startswith('http://computer.org/proceedings/')\
              or onelink.startswith('http://csdl.computer.org/')):
                # Bad: http://computer.org/proceedings/icsm/1189/11890007.pdf
                # Bad: http://csdl.computer.org/comp/proceedings/date/2003/1870/02/187020040.pdf
                # Good: http://www.computer.org/csdl/proceedings/icsm/2001/1189/00/11890004.pdf
                if onelink.startswith('http://csdl'):
                    cname, _, cid, mid, pid = onelink.split('/')[5:10]
                else:
                    cname, cid, pid = onelink.split('/')[4:7]
                    # heuristic
                    if pid.startswith(cid):
                        mid = pid[len(cid):len(cid) + 2]
                    else:
                        mid = '00'
                newee.append('http://www.computer.org/csdl/proceedings/{}/{}/{}/{}/{}'.format(\
                 cname,
                 o.get('year'),
                 cid,
                 mid,
                 pid))
            else:
                if onelink.find('ieee') > -1:
                    print(C.purple('IEEE'), onelink)
                if verbose:
                    print(C.yellow('Missed opportunity:'), onelink)
                # nothing matches => preserve
                newee.append(onelink)
        if len(newee) == 0:
            del o.json['ee']
        elif len(newee) == 1:
            o.json['ee'] = newee[0]
        else:
            o.json['ee'] = newee
        # post-processing normalisation
        if 'acmid' in o.json.keys() and not isinstance(
                o.json['acmid'], int) and o.json['acmid'].isdigit():
            o.json['acmid'] = int(o.json['acmid'])
    if 'eventuri' in o.json.keys():
        o.json['eventurl'] = o.json['eventuri']
        del o.json['eventuri']
    if 'eventurl' in o.json.keys() and o.json['eventurl'].startswith(
            'https://'):
        o.json['eventurl'] = o.json['eventurl'].replace('https://', 'http://')
    nlines = sorted(json2lines(o.getJSON().split('\n')))
    if flines != plines:
        return 1
    elif plines != nlines:
        f = open(fn, 'w', encoding='utf-8')
        f.write(o.getJSON())
        f.close()
        return 2
    else:
        return 0
Пример #5
0
def report(one, two):
	print('[ {} ] {}'.format(one, two))

def checkreport(fn, o):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
	r = checkon(fn, o)
	# non-verbose mode by default
	if verbose or r != 0:
		report(statuses[r], fn)
	return r

if __name__ == "__main__":
	if len(sys.argv) > 1:
		verbose = sys.argv[1] == '-v'
	print('{}: {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.purple('='*42)))
	cx = {0: 0, 1: 0, 2: 0}
	for v in sleigh.venues:
		for c in v.getConfs():
			cx[checkreport(c.filename, c)] += 1
			for p in c.papers:
				cx[checkreport(p.filename, p)] += 1
	print('{} files checked, {} ok, {} fixed, {} failed'.format(\
		C.bold(cx[0] + cx[1] + cx[2]),
		C.blue(cx[0]),
		C.yellow(cx[2]),
		C.red(cx[1])))
Пример #6
0
		f.close()
		return 2
	else:
		return 0

def checkreport(fn, o):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
	r = checkon(fn, o)
	# non-verbose mode by default
	if verbose or r != 0:
		print('[ {} ] {}'.format(statuses[r], fn))
	return r

if __name__ == "__main__":
	if len(sys.argv) < 4:
		print(C.purple('BibSLEIGH'), 'usage:')
		print('\t', sys.argv[0], '<key>', '<inputValue>', '<outputValue>', '[<limit>]', '[-v]')
		sys.exit(1)
	verbose = sys.argv[-1] == '-v'
	k2r = sys.argv[1]
	v2i = sys.argv[2]
	v2o = sys.argv[3]
	if len(sys.argv) > 4:
		d2r = sys.argv[4]
	print('{}: {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.purple('='*42)))
	cx = {0: 0, 1: 0, 2: 0}
	for v in sleigh.venues:
Пример #7
0
	f.close()
	f = open('scrap-committees/scraped-by-grammarware.csv', 'r')
	for line in f.readlines():
		csv.append(line.strip().split(';'))
	f.close()
	# All known contributors
	people = {}
	for fn in glob.glob(ienputdir + '/people/*.json'):
		p = parseJSON(fn)
		# people.append(p)
		if 'name' not in p.keys():
			print('[', C.red('NOGO'), ']', 'No name in', fn)
			continue
		people[p['name']] = p
	print('{}: {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.purple('='*42)))
	# All people who ever contributed
	names = []
	for v in sleigh.venues:
		for c in v.getConfs():
			for p in c.papers:
				for k in ('author', 'editor'):
					if k in p.json.keys():
						names += [a for a in listify(p.json[k]) if a not in names]
	# caching
	peoplekeys = people.keys()
	if os.path.exists('_established.json'):
		established = json.load(open('_established.json', 'r'))
Пример #8
0
    if n in name2file:
        return '<a href="{}">{}</a>'.format(name2file[n], shorten(n))
    else:
        return n


def pad(n):
    X = str(n)
    while len(X) < 4:
        X = '0' + X
    return X


if __name__ == "__main__":
    print('{}: {} venues, {} papers\n{}'.format(\
     C.purple('BibSLEIGH'),
     C.red(len(sleigh.venues)),
     C.red(sleigh.numOfPapers()),
     C.purple('='*42)))
    ps = []
    # flatten the sleigh
    bykey = {}
    for v in sleigh.venues:
        bykey[v.getKey()] = v
        for c in v.getConfs():
            bykey[c.getKey()] = c
            for p in c.papers:
                bykey[p.getKey()] = p
    print(C.purple('BibSLEIGH flattened to {} entries'.format(len(bykey))))
    # tagged = []
    # for k in ts.keys():
Пример #9
0
def linkto(n):
	if n in name2file:
		return '<a href="{}">{}</a>'.format(name2file[n], shorten(n))
	else:
		return n

def pad(n):
	X = str(n)
	while len(X) < 4:
		X = '0' + X
	return X

if __name__ == "__main__":
	print('{}: {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.purple('='*42)))
	ps = []
	# flatten the sleigh
	bykey = {}
	for v in sleigh.venues:
		bykey[v.getKey()] = v
		for c in v.getConfs():
			bykey[c.getKey()] = c
			for p in c.papers:
				bykey[p.getKey()] = p
	print(C.purple('BibSLEIGH flattened to {} entries'.format(len(bykey))))
	# tagged = []
	# for k in ts.keys():
Пример #10
0
    else:
        return 0


def checkreport(fn, o):
    statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
    r = checkon(fn, o)
    # non-verbose mode by default
    if verbose or r != 0:
        print('[ {} ] {}'.format(statuses[r], fn))
    return r


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print(C.purple('BibSLEIGH'), sys.argv[0], 'requires a limit to work.')
        sys.exit(1)
    verbose = sys.argv[-1] == '-v'
    d2r = sys.argv[1]
    print('{}: {} venues, {} papers\n{}'.format(\
     C.purple('BibSLEIGH'),
     C.red(len(sleigh.venues)),
     C.red(sleigh.numOfPapers()),
     C.purple('='*42)))
    cx = {0: 0, 1: 0, 2: 0}
    for v in sleigh.venues:
        for c in v.getConfs():
            cx[checkreport(c.filename, c)] += 1
            for p in c.papers:
                cx[checkreport(p.filename, p)] += 1
    print('{} files checked, {} ok, {} fixed, {} failed'.format(\
Пример #11
0
		return 0

def checkreport(fn, o):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
	r = checkon(fn, o)
	# non-verbose mode by default
	if verbose or r != 0:
		print('[ {} ] {}'.format(statuses[r], fn))
	return r

if __name__ == "__main__":
	if len(sys.argv) > 1:
		verbose = sys.argv[1] == '-v'
	tags = [parseJSON(tfn) for tfn in glob.glob(ienputdir + '/tags/*.json')]
	print('{}: {} tags, {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(tags)),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.purple('='*42)))
	cx = {0: 0, 1: 0, 2: 0}
	for v in sleigh.venues:
		for c in v.getConfs():
			# NB: We don’t tag conferences. Should we?
			# cx[checkreport(c.filename, c)] += 1
			for p in c.papers:
				cx[checkreport(p.filename, p)] += 1
	for rt in relieved.keys():
		print('[ {} ] {} relieved {} markings'.format(C.purple('√'), rt, relieved[rt]))
	print('{} files checked, {} ok, {} fixed, {} failed'.format(\
		C.bold(cx[0] + cx[1] + cx[2]),
Пример #12
0
def main():
    print('{}: {} venues, {} papers\n{}'.format(C.purple('BibSLEIGH'),
                                                C.red(len(sleigh.venues)),
                                                C.red(sleigh.numOfPapers()),
                                                C.purple('=' * 42)))
    # generate the index
    f = open(outputdir + '/index.html', 'w', encoding='utf-8')
    f.write(sleigh.getPage())
    f.close()
    # generate all individual pages
    # if False:
    for v in sleigh.venues:
        r = C.blue(v.getKey())
        f = open(outputdir + '/' + v.getKey() + '.html', 'w', encoding='utf-8')
        f.write(v.getPage())
        f.close()
        if v.brands:
            r += '{' + '+'.join([C.blue(b.getKey()) for b in v.brands]) + '}'
            for b in v.brands:
                f = open(outputdir + '/' + b.getKey() + '.brand.html',
                         'w',
                         encoding='utf-8')
                f.write(b.getPage())
                f.close()
        r += ' => '
        for c in v.getConfs():
            f = open(outputdir + '/' + c.getKey() + '.html',
                     'w',
                     encoding='utf-8')
            f.write(c.getPage())
            f.close()
            for p in c.papers:
                f = open(outputdir + '/' + p.getKey() + '.html',
                         'w',
                         encoding='utf-8')
                f.write(p.getPage())
                f.close()
            purekey = c.getKey().replace(v.getKey(), '').replace('-',
                                                                 ' ').strip()
            r += '{} [{}], '.format(purekey, C.yellow(len(c.papers)))
        print(r)
    # generate the icon lineup
    icons = []
    linked = []
    pngs = [
        lastSlash(png).split('.')[0]
        for png in glob.glob(outputdir + '/stuff/*.png')
    ]
    pngs = [png for png in pngs \
            if not (png.startswith('a-') or png.startswith('p-') or png.startswith('ico-')
                    or png in ('cc-by', 'xhtml', 'css', 'open-knowledge', 'edit'))]
    for brand in glob.glob(outputdir + '/*.brand.html'):
        pure = lastSlash(brand).split('.')[0]
        img = pure.lower().replace(' ', '')
        if img in pngs:
            pic = '<div class="wider"><a href="{0}.brand.html"><img class="abc" src="{1}" alt="{0}"/></a><span>{0}</span></div>'.format( \
                pure,
                'stuff/' + img + '.png')
            pngs.remove(img)
            icons.append(pic)
        else:
            # print('No image for', pure)
            pass
    corner = {
        'ada': 'TRI-Ada',
        'comparch': 'CompArch',
        'floc': 'FLoC',
        'bibsleigh': 'index'
    }
    for pure in pngs:
        venueCandidate = corner[pure] if pure in corner else pure.upper()
        canlink = sorted(glob.glob(outputdir + '/' + venueCandidate +
                                   '*.html'),
                         key=len)
        if canlink:
            pic = '<div class="wider"><a href="{0}"><img class="abc" src="stuff/{1}.png" alt="{2}"/></a><span>{2}</span></div>'.format( \
                canlink[0].split('/')[-1],
                pure,
                venueCandidate,
                canlink[0].split('/')[0])
        elif pure == 'twitter':
            pic = '<div class="wider"><a href="https://about.twitter.com/company/brand-assets"><img class="abc" src="stuff/twitter.png" alt="Twitter"/></a><span>Twitter</span></div>'
        elif pure == 'email':
            pic = '<div class="wider"><a href="mailto:[email protected]"><img class="abc" src="stuff/email.png" alt="e-mail"/></a><span>email</span></div>'
        else:
            print('Lonely', pure)
            pic = '<img class="abc" src="stuff/{0}.png" alt="{0}"/>'.format(
                pure)
        icons.append(pic)
    # find last year of each venue
    # for ven in glob.glob(corpusdir + '/*'):
    # 	venname = lastSlash(ven)
    # 	newstuff += '<strong><a href="http://dblp.uni-trier.de/db/conf/{}/">{} {}</a></strong>, '.format(venname.lower(), venname, nextYear(ven))
    # print(lastSlash(ven), ':', lastYear(ven))
    # write "more info" file
    f = open(outputdir + '/about.html', 'w', encoding='utf-8')
    f.write(
        aboutHTML.format(
            len(icons),
            '<div class="minibar">' + '\n'.join(sorted(icons)) + '</div>'))
    f.close()

    # generate the DBLP sync page
    cell_by_conf_by_year = {}
    Ys = [
        2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009
    ]
    dblplinks = {}

    with open(ienputdir + '/meta/dblpguide.sync', 'r') as f:
        for line in f:
            if not line or line.startswith('#'):
                continue
            words = line.split('|')
            if len(words) != 3:
                print('- Metaline {} skipped!'.format(words))
                continue
            name = words[0].strip()
            dome = words[1].strip()
            dblp = words[2].strip()
            cell_by_conf_by_year[name] = {}
            dblplinks[name] = dblp
            for y in Ys:
                cell_by_conf_by_year[name][y] = '(no)'
            v = sleigh.getVenue(dome)
            if v:
                for yy in Ys:
                    y = v.getYear(yy)
                    if y:
                        ckey = '{}-{}'.format(name, yy)
                        c = y.getConf(ckey)
                        if c:
                            cell_by_conf_by_year[name][yy] = c.getIconItem2(
                                '', '')
                        else:
                            # print('- Conference {} of year {} in venue {} not found in the corpus'.format(ckey, yy, name))
                            for alt in 'v1', 'p1', 'c1', '1', 'J':
                                ckey = '{}-{}-{}'.format(name, alt, yy)
                                c = y.getConf(ckey)
                                if c:
                                    cell_by_conf_by_year[name][
                                        yy] = c.getIconItem2('', '')
                                    break
                # else:
                # 	print('- Year {} in venue {} not found in the corpus among {}'.format(yy, name, [z.year for z in v.years]))
        # else:
        # 	print('- Venue {} not found in the corpus'.format(name))

    table = '<table>'
    table += '<tr><td></td>'
    for y in Ys:
        table += '<th>{}</th>\n'.format(y)
    table += '</tr>'
    # print (cell_by_conf_by_year)
    for name in sorted(cell_by_conf_by_year.keys()):
        table += '<tr><th><a href="{}.brand.html">[@]</a> <a href="{}">{}</a></th>'.format(
            name, dblplinks[name], name)
        for y in Ys:
            table += '<td>{}</td>\n'.format(cell_by_conf_by_year[name][y])
        table += '</tr>'
    table += '</table>'

    with open(outputdir + '/sync.html', 'w', encoding='utf-8') as f:
        f.write(syncHTML.format(table))

    print('{}\nDone with {} venues, {} papers.'.format(
        C.purple('=' * 42), C.red(len(sleigh.venues)),
        C.red(sleigh.numOfPapers())))
Пример #13
0
def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	f = open(fn, 'r')
	lines = f.readlines()[1:-1]
	f.close()
	flines = json2lines(lines)
	plines = sorted(json2lines(o.getJSON().split('\n')))
	# "url" from DBLP are useless
	if 'url' in o.json.keys():
		o.json['url'] = [link.replace('https://', 'http://')\
						for link in listify(o.json['url'])\
		 				if not link.startswith('db/conf/')\
		 				and not link.startswith('db/series/')\
		 				and not link.startswith('db/books/')\
						and not link.startswith('db/journals/')]
		if not o.json['url']:
			del o.json['url']
		elif len(o.json['url']) == 1:
			o.json['url'] = o.json['url'][0]
	if 'ee' in o.json.keys() and 'doi' not in o.json.keys():
		if isinstance(o.json['ee'], list):
			if verbose:
				print(C.red('Manylink:'), o.json['ee'])
		newee = []
		for onelink in listify(o.json['ee']):
			if onelink.startswith('http://dx.doi.org/'):
				o.json['doi'] = onelink[18:]
			elif onelink.startswith('http://doi.acm.org/'):
				o.json['doi'] = onelink[19:]
			elif onelink.startswith('http://doi.ieeecomputersociety.org/'):
				o.json['doi'] = onelink[35:]
			elif onelink.startswith('http://dl.acm.org/citation.cfm?id='):
				o.json['acmid'] = onelink[34:]
			elif onelink.startswith('http://portal.acm.org/citation.cfm?id='):
				o.json['acmid'] = onelink[38:]
			elif onelink.startswith('http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=')\
			  or onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber='):
				o.json['ieeearid'] = onelink.split('=')[-1]
			elif onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=')\
			 and onelink.find('arnumber') > -1:
				o.json['ieeearid'] = onelink.split('arnumber=')[-1].split('&')[0]
			elif onelink.startswith('http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber='):
				o.json['ieeepuid'] = onelink.split('=')[-1]
			elif onelink.startswith('http://ieeexplore.ieee.org/xpl/tocresult.jsp?isnumber='):
				o.json['ieeeisid'] = onelink.split('=')[-1]
			elif onelink.startswith('http://eceasst.cs.tu-berlin.de/index.php/eceasst/article/view/'):
				newee.append('http://journal.ub.tu-berlin.de/eceasst/article/view/' + onelink.split('/')[-1])
			elif onelink.endswith('.pdf') and \
			    (onelink.startswith('http://computer.org/proceedings/')\
			  or onelink.startswith('http://csdl.computer.org/')):
				# Bad: http://computer.org/proceedings/icsm/1189/11890007.pdf
				# Bad: http://csdl.computer.org/comp/proceedings/date/2003/1870/02/187020040.pdf
				# Good: http://www.computer.org/csdl/proceedings/icsm/2001/1189/00/11890004.pdf
				if onelink.startswith('http://csdl'):
					cname, _, cid, mid, pid = onelink.split('/')[5:10]
				else:
					cname, cid, pid = onelink.split('/')[4:7]
					# heuristic
					if pid.startswith(cid):
						mid = pid[len(cid):len(cid)+2]
					else:
						mid = '00'
				newee.append('http://www.computer.org/csdl/proceedings/{}/{}/{}/{}/{}'.format(\
					cname,
					o.get('year'),
					cid,
					mid,
					pid))
			else:
				if onelink.find('ieee') > -1:
					print(C.purple('IEEE'), onelink)
				if verbose:
					print(C.yellow('Missed opportunity:'), onelink)
				# nothing matches => preserve
				newee.append(onelink)
		if len(newee) == 0:
			del o.json['ee']
		elif len(newee) == 1:
			o.json['ee'] = newee[0]
		else:
			o.json['ee'] = newee
		# post-processing normalisation
		if 'acmid' in o.json.keys() and not isinstance(o.json['acmid'], int) and o.json['acmid'].isdigit():
			o.json['acmid'] = int(o.json['acmid'])
	if 'eventuri' in o.json.keys():
		o.json['eventurl'] = o.json['eventuri']
		del o.json['eventuri']
	if 'eventurl' in o.json.keys() and o.json['eventurl'].startswith('https://'):
		o.json['eventurl'] = o.json['eventurl'].replace('https://', 'http://')
	nlines = sorted(json2lines(o.getJSON().split('\n')))
	if flines != plines:
		return 1
	elif plines != nlines:
		f = open(fn, 'w')
		f.write(o.getJSON())
		f.close()
		return 2
	else:
		return 0
Пример #14
0
def checkreport(fn, o):
    statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
    r = checkon(fn, o)
    # non-verbose mode by default
    if verbose or r != 0:
        print('[ {} ] {}'.format(statuses[r], fn))
    return r


if __name__ == "__main__":
    if len(sys.argv) > 1:
        verbose = sys.argv[1] == '-v'
    tags = [parseJSON(tfn) for tfn in glob.glob(ienputdir + '/tags/*.json')]
    print('{}: {} tags, {} venues, {} papers\n{}'.format(\
     C.purple('BibSLEIGH'),
     C.red(len(tags)),
     C.red(len(sleigh.venues)),
     C.red(sleigh.numOfPapers()),
     C.purple('='*42)))
    cx = {0: 0, 1: 0, 2: 0}
    for v in sleigh.venues:
        for c in v.getConfs():
            # NB: We don’t tag conferences. Should we?
            # cx[checkreport(c.filename, c)] += 1
            for p in c.papers:
                cx[checkreport(p.filename, p)] += 1
    for rt in relieved.keys():
        print('[ {} ] {} relieved {} markings'.format(C.purple('√'), rt,
                                                      relieved[rt]))
    print('{} files checked, {} ok, {} fixed, {} failed'.format(\
Пример #15
0
		f.close()
		return 2
	else:
		return 0

def checkreport(fn, o):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
	r = checkon(fn, o)
	# non-verbose mode by default
	if verbose or r != 0:
		print('[ {} ] {}'.format(statuses[r], fn))
	return r

if __name__ == "__main__":
	if len(sys.argv) < 4:
		print(C.purple('BibSLEIGH'), 'usage:')
		print('\t', sys.argv[0], '<key>', '<inputValue>', '<outputValue>', '[<limit>]', '[-v]')
		sys.exit(1)
	verbose = sys.argv[-1] == '-v'
	k2r = sys.argv[1]
	v2i = sys.argv[2]
	v2o = sys.argv[3]
	if len(sys.argv) > 4:
		d2r = sys.argv[4]
	print('{}: {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.purple('='*42)))
	cx = {0: 0, 1: 0, 2: 0}
	for v in sleigh.venues: