Пример #1
0
    def run(self):
        if not translator.init():
            return
        os.nice(10)

        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.bind((self.HOST, self.PORT))
        s.listen(10)
        while True:
            conn, addr = s.accept()
            print 'Connected by', addr
            while True:
                data = conn.recv(1024)
                if not data: break
                try:
                    query = json.loads(data)
                    if query['auto']:
                        translator.translate_selected()
                    else: 
                        translator.translate(query['text'], query['lang'])
                except TypeError, KeyError:
                    print 'Request is not valid.'
            conn.close()
Пример #2
0
from controller.mainwindow import MainWindowController
import loader
import translator

if __name__ == "__main__":
    from PyQt5.QtWidgets import QApplication
    import sys
    translator.init("portugueses")
    app = QApplication(sys.argv)
    win = MainWindowController()
    win.view.setStyleSheet(loader.loadStyle("main.style.css"))
    win.run()
    sys.exit(app.exec_())
Пример #3
0
			mode += 'N'

	if mode == 'NNN':
		print "Error! Mode 'NNN' can't by used."
		sys.exit()

	return mode


if len(sys.argv) != 3:
	print 'Usage: {0} XXX file'.format(sys.argv[0])
	print ''
	print 'Mode:'
	print '  Y__\t search tokens in the English dictionary'
	print '  _Y_\t search tokens in the Lingo dictionary'
	print '  __Y\t keep original tokens'
	print ''
	print 'Any combination can be used. Examples:'
	print '  YYY - it will search tokens in both dictionaries and ḱeep original tokens'
	print '  YYN - it will search tokens in both dictionaries and discard original tokens'
	print '  YNY - it will search tokens only in the English dictionary and ḱeep original tokens'
	sys.exit()

else:
	mode = fixMode(sys.argv[1])

	print "Translating file '{0}' with mode '{1}'.".format(sys.argv[2], mode)
	translator.init(mode)
	translator.translate_file(sys.argv[2], mode)
	print 'Done!'
Пример #4
0
def translate(idir, ns):
    # create graphdata graph instance
    graph = rdflib.Graph(identifier='Microsoft_Academic_Graph.Data')
    graph.open('./store', create=True)

    # set namespaces
    nsmgr = rdflib.namespace.NamespaceManager(graph)
    nsmgr.bind('base', rdflib.Namespace(ns))
    nsmgr.bind('skos',
               rdflib.Namespace('http://www.w3.org/2004/02/skos/core#'))
    nsmgr.bind('void', rdflib.Namespace('http://rdfs.org/ns/void#'))
    nsmgr.bind('dcterms', rdflib.Namespace('http://purl.org/dc/elements/1.1/'))
    nsmgr.bind('foaf', rdflib.Namespace('http://xmlns.com/foaf/0.1/'))

    nss = dict(nsmgr.namespaces())

    print('initiating graph...')
    translator.init(graph, nss)

    #with open(idir + '2016KDDCupSelectedAffiliations.txt') as f:
    #print('processing affiliations...')
    #affiliations = translator.f2016KDDCupSelectedAffiliationsHandler(graph, nss, f)

    # gather all KDD selected papers plus their conferences (== targets) plus year
    with open(idir + '2016KDDCupSelectedPapers.txt') as f:
        print('processing 2016 papers...')
        kddPapers, kddConfs, kddYears = zip(
            *translator.f2016KDDCupSelectedPapersHandler(graph, nss, f))
    print()
    print('found {} papers, with conferences, and years'.format(
        len(kddPapers)))

    # add conference instances from from all target conferences from year > 2010
    with open(idir + 'ConferenceInstances.txt') as f:
        print('processing conference instances...')
        translator.fKDDConferenceInstancesHandler(graph, nss, f, kddConfs,
                                                  kddYears, kddPapers)

    del kddYears
    print()

    # add papers from target conference instances
    with open(idir + 'Papers.txt') as f:
        print('processing papers...')
        (paperConfIndex,
         kddJournals) = translator.fKDDPapersHandler(graph, nss, f, kddPapers,
                                                     kddConfs)

    print()
    print('found {} KDD journals and {} Non-KDD paper/conference pairs'.format(
        len(kddJournals), len(paperConfIndex)))

    # gather all relevant fields of study for target conferences
    with open(idir + 'PaperKeywords.txt') as f:
        print('processing paper keywords...')
        (kddKeywords, allKeywords) = translator.fPaperKeywordsHandler(graph, nss, f,\
                                                                      kddPapers,\
                                                                      paperConfIndex)
    del paperConfIndex
    print()
    print('found {} KDD keywords and {} Non-KDD conference/keywords pairs'.
          format(len(kddKeywords), len(allKeywords)))

    print('processing keyword graph...')
    kwgraph, nrOfKws = fieldOfStudyParser(idir, nss)  # keyword hierarchy graph
    print()
    print('found {} keywords in total'.format(nrOfKws))
    print('expanding KDD Keywords...')
    kddKeywords = translator.downwardsExpandKeywordTree(
        kwgraph, nss, kddKeywords)
    print()
    print('expanded to {} KDD keywords'.format(len(kddKeywords)))
    print('expanding non-KDD Keywords...')
    progress = 0
    for k, v in allKeywords.items():
        allKeywords[k] = translator.downwardsExpandKeywordTree(kwgraph, nss, v)
        progress += 1
        if progress % 100 == 0:
            sys.stdout.write('\r ' + str(progress) + ' lines read ')

    kddConfs = set(kddConfs)  # rmv duplicates
    l = len(kddConfs)
    # all potentially interesting conferences
    print('expanding conferences...')
    allConfs = translator.expandConferences(kddKeywords, allKeywords, nrOfKws)
    allConfs.extend(kddConfs)
    del kddKeywords
    del kddConfs
    print()
    print('expanded from {} to {} conferences'.format(l, len(allConfs)))

    # add papers from related conferences
    with open(idir + 'Papers.txt') as f:
        print('expanding papers...')
        (paperYearConfs,
         journals) = translator.fPapersHandler(graph, nss, f, kddPapers,
                                               allConfs)

    journals = set(journals)  # rmv duplicates
    journals = journals.union(kddJournals)
    papers, years, confs = zip(*paperYearConfs)

    print()
    print('found {} additional journals and {} papers'.format(
        len(journals), len(papers)))

    del paperYearConfs
    del kddJournals

    # add conference instances from from all related conferences from year > 2010
    l = len(graph)
    with open(idir + 'ConferenceInstances.txt') as f:
        print('expanding conference instances...')
        translator.fConferenceInstancesHandler(graph, nss, f, allConfs, years,
                                               papers, confs)
    print()
    print('expanded graph with {} conference instance triples'.format(
        len(graph) - l))

    ### rest is generic ###

    papers = list(papers)
    papers.extend(kddPapers)
    del kddPapers
    print()
    print('expanded to {} papers'.format(len(papers)))

    # add conference organizations
    l0 = len(graph)
    with open(idir + 'Conferences.txt') as f:
        print('processing conferences...')
        translator.fConferencesHandler(graph, nss, f, allConfs)
    print()
    print('expanded graph with {} conference triples'.format(len(graph) - l0))

    # add journals from papers
    l0 = len(graph)
    with open(idir + 'Journals.txt') as f:
        print('processing journals...')
        translator.fJournalsHandler(graph, nss, f, journals)
    print()
    print('expanded graph with {} journal triples'.format(len(graph) - l0))

    # add nr of citations by others
    l0 = len(graph)
    with open(idir + 'PaperReferences.txt') as f:
        print('processing paper references...')
        paperRefs = translator.fPaperReferencesHandler(graph, nss, f, papers)

    translator.fPaperRefCount(graph, nss, paperRefs)
    print()
    print('expanded graph with {} reference triples'.format(len(graph) - l0))

    # not relevant feature
    #with open(idir + 'PaperUrls.txt') as f:
    #    translator.fPaperUrlsHandler(graph, nss, f, papers)

    # add authors from papers + affiliation
    with open(idir + 'PaperAuthorAffiliations.txt') as f:
        print('processing paper/author/affiliations...')
        (authors, affiliations) = translator.fPaperAuthorAffiliationsHandler(
            graph, nss, f, papers)
    authors = set(authors)
    affiliations = set(affiliations)
    print()
    print('found {} authors and {} affiliations'.format(
        len(authors), len(affiliations)))

    with open(idir + 'Affiliations.txt') as f:
        print('processing affiliations...')
        translator.fAffiliationsHandler(graph, nss, f, affiliations)

    # add names
    with open(idir + 'Authors.txt') as f:
        print('processing authors...')
        translator.fAuthorsHandler(graph, nss, f, authors)

    print()
    print('{} triples in total'.format(len(graph)))
    return graph
Пример #5
0
def translate(idir, ns):
    # create graphdata graph instance
    graph = rdflib.Graph(identifier='Microsoft_Academic_Graph.Data')
    graph.open('./store', create = True)

    # set namespaces
    nsmgr = rdflib.namespace.NamespaceManager(graph)
    nsmgr.bind('base', rdflib.Namespace(ns))
    nsmgr.bind('skos', rdflib.Namespace('http://www.w3.org/2004/02/skos/core#'))
    nsmgr.bind('void', rdflib.Namespace('http://rdfs.org/ns/void#'))
    nsmgr.bind('dcterms', rdflib.Namespace('http://purl.org/dc/elements/1.1/'))
    nsmgr.bind('foaf', rdflib.Namespace('http://xmlns.com/foaf/0.1/'))

    nss = dict(nsmgr.namespaces())

    print('initiating graph...')
    translator.init(graph, nss)

    #with open(idir + '2016KDDCupSelectedAffiliations.txt') as f:
        #print('processing affiliations...')
        #affiliations = translator.f2016KDDCupSelectedAffiliationsHandler(graph, nss, f)
    
    # gather all KDD selected papers plus their conferences (== targets) plus year
    with open(idir + '2016KDDCupSelectedPapers.txt') as f:
        print('processing 2016 papers...')
        kddPapers, kddConfs, kddYears = zip(*translator.f2016KDDCupSelectedPapersHandler(graph, nss, f))
    print()
    print('found {} papers, with conferences, and years'.format(len(kddPapers)))
    

    # add conference instances from from all target conferences from year > 2010
    with open(idir + 'ConferenceInstances.txt') as f:
        print('processing conference instances...')
        translator.fKDDConferenceInstancesHandler(graph, nss, f, kddConfs, kddYears, kddPapers)

    del kddYears
    print()

    # add papers from target conference instances
    with open(idir + 'Papers.txt') as f:
        print('processing papers...')
        (paperConfIndex, kddJournals) = translator.fKDDPapersHandler(graph, nss, f, kddPapers, kddConfs)
    
    print()
    print('found {} KDD journals and {} Non-KDD paper/conference pairs'.format(len(kddJournals), len(paperConfIndex)))


    # gather all relevant fields of study for target conferences
    with open(idir + 'PaperKeywords.txt') as f:
        print('processing paper keywords...')
        (kddKeywords, allKeywords) = translator.fPaperKeywordsHandler(graph, nss, f,\
                                                                      kddPapers,\
                                                                      paperConfIndex)
    del paperConfIndex
    print()
    print('found {} KDD keywords and {} Non-KDD conference/keywords pairs'.format(len(kddKeywords), len(allKeywords)))


    print('processing keyword graph...')
    kwgraph, nrOfKws = fieldOfStudyParser(idir, nss)  # keyword hierarchy graph
    print()
    print('found {} keywords in total'.format(nrOfKws))
    print('expanding KDD Keywords...')
    kddKeywords = translator.downwardsExpandKeywordTree(kwgraph, nss, kddKeywords)
    print()
    print('expanded to {} KDD keywords'.format(len(kddKeywords)))
    print('expanding non-KDD Keywords...')
    progress = 0
    for k,v in allKeywords.items():
        allKeywords[k] = translator.downwardsExpandKeywordTree(kwgraph, nss, v)
        progress += 1
        if progress % 100 == 0:
            sys.stdout.write('\r ' + str(progress) + ' lines read ')


    kddConfs = set(kddConfs)  # rmv duplicates
    l = len(kddConfs)
    # all potentially interesting conferences
    print('expanding conferences...')
    allConfs = translator.expandConferences(kddKeywords, allKeywords, nrOfKws)
    allConfs.extend(kddConfs)
    del kddKeywords
    del kddConfs
    print()
    print('expanded from {} to {} conferences'.format(l, len(allConfs)))
    
    # add papers from related conferences
    with open(idir + 'Papers.txt') as f:
        print('expanding papers...')
        (paperYearConfs, journals) = translator.fPapersHandler(graph, nss, f, kddPapers, allConfs)

    journals = set(journals) # rmv duplicates
    journals = journals.union(kddJournals)
    papers, years, confs = zip(*paperYearConfs)

    print()
    print('found {} additional journals and {} papers'.format(len(journals), len(papers)))
    
    del paperYearConfs
    del kddJournals

    # add conference instances from from all related conferences from year > 2010
    l = len(graph)
    with open(idir + 'ConferenceInstances.txt') as f:
        print('expanding conference instances...')
        translator.fConferenceInstancesHandler(graph, nss, f, allConfs, years, papers, confs)
    print()
    print('expanded graph with {} conference instance triples'.format(len(graph)-l))

    ### rest is generic ###
    
    papers = list(papers)
    papers.extend(kddPapers)
    del kddPapers
    print()
    print('expanded to {} papers'.format(len(papers)))

    # add conference organizations  
    l0 = len(graph)
    with open(idir + 'Conferences.txt') as f:
        print('processing conferences...')
        translator.fConferencesHandler(graph, nss, f, allConfs)
    print()
    print('expanded graph with {} conference triples'.format(len(graph)-l0))

    # add journals from papers
    l0 = len(graph)
    with open(idir + 'Journals.txt') as f:
        print('processing journals...')
        translator.fJournalsHandler(graph, nss, f, journals)
    print()
    print('expanded graph with {} journal triples'.format(len(graph)-l0))

    # add nr of citations by others
    l0 = len(graph)
    with open(idir + 'PaperReferences.txt') as f:
        print('processing paper references...')
        paperRefs = translator.fPaperReferencesHandler(graph, nss, f, papers)
  
    translator.fPaperRefCount(graph, nss, paperRefs)
    print()
    print('expanded graph with {} reference triples'.format(len(graph)-l0))
    
    # not relevant feature
    #with open(idir + 'PaperUrls.txt') as f:
    #    translator.fPaperUrlsHandler(graph, nss, f, papers)
    
  
    # add authors from papers + affiliation
    with open(idir + 'PaperAuthorAffiliations.txt') as f:
        print('processing paper/author/affiliations...')
        (authors, affiliations) = translator.fPaperAuthorAffiliationsHandler(graph, nss, f, papers) 
    authors = set(authors)
    affiliations = set(affiliations)
    print()
    print('found {} authors and {} affiliations'.format(len(authors), len(affiliations)))
    
    with open(idir + 'Affiliations.txt') as f:
        print('processing affiliations...')
        translator.fAffiliationsHandler(graph, nss, f, affiliations)
   
    # add names 
    with open(idir + 'Authors.txt') as f:
        print('processing authors...')
        translator.fAuthorsHandler(graph, nss, f, authors)
    
    print()
    print('{} triples in total'.format(len(graph)))
    return graph