def get_kbd_values(kbname, searchwith=""):
    """Return a list of values by searching a dynamic kb.

    @param kbname:     name of the knowledge base
    @param searchwith: a term to search with
    """
    import search_engine

    #first check that the kb in question is dynamic
    kbid = bibknowledge_dblayer.get_kb_id(kbname)
    if not kbid:
        return []
    kbtype = bibknowledge_dblayer.get_kb_type(kbid)
    if not kbtype:
        return []
    if kbtype != 'd':
        return []
    #get the configuration so that we see what the field is
    confdict =  bibknowledge_dblayer.get_kb_dyn_config(kbid)
    if not confdict:
        return []
    if not confdict.has_key('field'):
        return []
    field = confdict['field']
    expression = confdict['expression']
    collection = ""
    if confdict.has_key('collection'):
        collection = confdict['collection']
    reclist = [] #return this
    if searchwith and expression:
        if (expression.count('%') > 0):
            expression = expression.replace("%", searchwith)
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
        else:
            #no %.. just make a combination
            expression = expression + " and "+searchwith
            reclist = search_engine.perform_request_search(p=expression,
                                                       cc=collection)
    else: #either no expr or no searchwith.. but never mind about searchwith
        if expression: #in this case: only expression
            reclist = search_engine.perform_request_search(p=expression, cc=collection)
        else:
            #make a fake expression so that only records that have this field
            #will be returned
            fake_exp = "/.*/"
            if searchwith:
                fake_exp = searchwith
            reclist = search_engine.perform_request_search(f=field, p=fake_exp, cc=collection)
    if reclist:
        fieldvaluelist = search_engine.get_most_popular_field_values(reclist,
                                                                     field)
        val_list = []
        for f in fieldvaluelist:
            (val, dummy) = f
            val_list.append(val)
        return val_list
    return [] #in case nothing worked
Пример #2
0
def get_kbd_values(kbname, searchwith=""):
    """Return a list of values by searching a dynamic kb.

    @param kbname:     name of the knowledge base
    @param searchwith: a term to search with
    """
    import search_engine

    #first check that the kb in question is dynamic
    kbid = bibknowledge_dblayer.get_kb_id(kbname)
    if not kbid:
        return []
    kbtype = bibknowledge_dblayer.get_kb_type(kbid)
    if not kbtype:
        return []
    if kbtype != 'd':
        return []
    #get the configuration so that we see what the field is
    confdict =  bibknowledge_dblayer.get_kb_dyn_config(kbid)
    if not confdict:
        return []
    if not confdict.has_key('field'):
        return []
    field = confdict['field']
    expression = confdict['expression']
    collection = ""
    if confdict.has_key('collection'):
        collection = confdict['collection']
    reclist = [] #return this
    if searchwith and expression:
        if (expression.count('%') > 0):
            expression = expression.replace("%", searchwith)
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
        else:
            #no %.. just make a combination
            expression = expression + " and "+searchwith
            reclist = search_engine.perform_request_search(p=expression,
                                                       cc=collection)
    else: #either no expr or no searchwith.. but never mind about searchwith
        if expression: #in this case: only expression
            reclist = search_engine.perform_request_search(p=expression, cc=collection)
        else:
            #make a fake expression so that only records that have this field
            #will be returned
            fake_exp = "/.*/"
            if searchwith:
                fake_exp = searchwith
            reclist = search_engine.perform_request_search(f=field, p=fake_exp, cc=collection)
    if reclist:
        fieldvaluelist = search_engine.get_most_popular_field_values(reclist,
                                                                     field)
        val_list = []
        for f in fieldvaluelist:
            (val, dummy) = f
            val_list.append(val)
        return val_list
    return [] #in case nothing worked
Пример #3
0
def get_kbd_values(kbname, searchwith=""):
    """
    To be used by bibedit. Returns a list of values based on a dynamic kb.
    @param kbname: name of the knowledge base
    @param searchwith: a term to search with
    """
    import search_engine

    #first check that the kb in question is dynamic
    kbid = bibknowledge_dblayer.get_kb_id(kbname)
    if not kbid:
        return []
    kbtype = bibknowledge_dblayer.get_kb_type(kbid)
    if not kbtype:
        return []
    if kbtype != 'd':
        return []
    #get the configuration so that we see what the field is
    confdict =  bibknowledge_dblayer.get_kb_dyn_config(kbid)
    if not confdict:
        return []
    if not confdict.has_key('field'):
        return []
    field = confdict['field']
    expression = confdict['expression']
    collection = ""
    if confdict.has_key('collection'):
        collection = confdict['collection']
    reclist = [] #return this
    #see if searchwith is a quoted expression
    if searchwith:
        if not searchwith.startswith("'"):
            searchwith = "'"+searchwith
        if not searchwith.endswith("'"):
            searchwith = searchwith+"'"
    if searchwith and expression:
        if (expression.count('%') > 0) or (expression.endswith(":*")):
            expression = expression.replace("%", searchwith)
            expression = expression.replace(":*", ':'+searchwith)
        else:
            #no %.. just make a combination
            expression = expression + "and "+searchwith
        reclist = search_engine.perform_request_search(p=expression,
                                                       cc=collection)
    else: #either no expr or no searchwith.. but never mind about searchwith
        if expression:
            reclist = search_engine.perform_request_search(p=expression, cc=collection)
        else:
            #make a fake expression so that only records that have this field
            #will be returned
            fake_exp = "/.*/"
            if searchwith:
                fake_exp = searchwith
            reclist = search_engine.perform_request_search(f=field, p=fake_exp, cc=collection)
    if reclist:
        fieldvaluelist = search_engine.get_most_popular_field_values(reclist,
                                                                     field)
        val_list = []
        for f in fieldvaluelist:
            (val, dummy) = f
            #support "starts with",
            #indicated by the * at the end of the searchstring
            if searchwith and (len(searchwith) > 2) and (searchwith[-2] == '*'):
                if (val.startswith(searchwith[1:-3])):
                    val_list.append(val)
            else:
                val_list.append(val)
        return val_list
    return [] #in case nothing worked
def personid_fast_assign_papers(paperslist=None, use_threading_not_multiprocessing=True):
    '''
    Assign papers to the most compatible person.
    Compares only the name to find the right person to assign to. If nobody seems compatible,
    create a new person.
    '''

    class Worker(Thread):
        def __init__(self, i, p_q, atul, personid_new_id_lock, checker):
            Thread.__init__(self)
            self.i = i
            self.checker = checker
            self.p_q = p_q
            self.atul = atul
            self.personid_new_id_lock = personid_new_id_lock

        def run(self):
            while True:
                if checker.should_stop():
                    break
                try:
                    bibrec = self.p_q.get_nowait()
                except Empty:
                    break
                close_connection()

                pfap_assign_paper_iteration(self.i, bibrec, self.atul, self.personid_new_id_lock)

    def _pfap_assign_paper(i, p_q, atul, personid_new_id_lock, checker):
        while True:
            # check bibsched
            if checker.should_stop():
                break

            try:
                bibrec = p_q.get_nowait()
            except Empty:
                break

            pfap_assign_paper_iteration(i, bibrec, atul, personid_new_id_lock)


    _pfap_printmsg('starter', 'Started')
    if not paperslist:
        #paperslist = run_sql('select id from bibrec where 1')
        paperslist = [[x] for x in perform_request_search(p="")]

    paperslist = [k[0] for k in paperslist]

    _pfap_printmsg('starter', 'Starting on %s papers ' % len(paperslist))

    if use_threading_not_multiprocessing:
        authornames_table_update_lock = Lock()
        personid_new_id_lock = Lock()
        papers_q = Queue()
    else:
        authornames_table_update_lock = multiprocessing.Lock()
        personid_new_id_lock = multiprocessing.Lock()
        papers_q = multiprocessing.Queue()

    for p in paperslist:
        papers_q.put(p)

    process_list = []
    c = 0
    if not use_threading_not_multiprocessing:
        while not papers_q.empty():
            checker = status_checker()
            while len(process_list) <= bconfig.CFG_BIBAUTHORID_MAX_PROCESSES:
                p = multiprocessing.Process(target=_pfap_assign_paper, args=(c, papers_q,
                                                                    authornames_table_update_lock,
                                                                    personid_new_id_lock, checker))
                c += 1
                process_list.append(p)
                p.start()

            for i, p in enumerate(tuple(process_list)):
                if not p.is_alive():
                    p.join()
                    process_list.remove(p)

            task_sleep_now_if_required(True)
    else:
        max_processes = bconfig.CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS
        checker = status_checker()
        workers = []
        while not papers_q.empty():
            i = 0
            while len(workers) < max_processes:
                w = Worker(i, papers_q, authornames_table_update_lock,
                           personid_new_id_lock, checker)
                i += 1
                w.start()
                workers.append(w)
            for c, p in enumerate(tuple(workers)):
                if not p.is_alive():
                    p.join()
                    workers.remove(p)

            task_sleep_now_if_required(True)
Пример #5
0
def get_kbd_values(kbname, searchwith=""):
    """
    To be used by bibedit. Returns a list of values based on a dynamic kb.
    @param kbname: name of the knowledge base
    @param searchwith: a term to search with
    """
    import search_engine

    #first check that the kb in question is dynamic
    kbid = bibknowledge_dblayer.get_kb_id(kbname)
    if not kbid:
        return []
    kbtype = bibknowledge_dblayer.get_kb_type(kbid)
    if not kbtype:
        return []
    if kbtype != 'd':
        return []
    #get the configuration so that we see what the field is
    confdict = bibknowledge_dblayer.get_kb_dyn_config(kbid)
    if not confdict:
        return []
    if not confdict.has_key('field'):
        return []
    field = confdict['field']
    expression = confdict['expression']
    collection = ""
    if confdict.has_key('collection'):
        collection = confdict['collection']
    reclist = []  #return this
    #see if searchwith is a quoted expression
    if searchwith:
        if not searchwith.startswith("'"):
            searchwith = "'" + searchwith
        if not searchwith.endswith("'"):
            searchwith = searchwith + "'"
    if searchwith and expression:
        if (expression.count('%') > 0) or (expression.endswith(":*")):
            expression = expression.replace("%", searchwith)
            expression = expression.replace(":*", ':' + searchwith)
        else:
            #no %.. just make a combination
            expression = expression + "and " + searchwith
        reclist = search_engine.perform_request_search(p=expression,
                                                       cc=collection)
    else:  #either no expr or no searchwith.. but never mind about searchwith
        if expression:
            reclist = search_engine.perform_request_search(p=expression,
                                                           cc=collection)
        else:
            #make a fake expression so that only records that have this field
            #will be returned
            fake_exp = "/.*/"
            if searchwith:
                fake_exp = searchwith
            reclist = search_engine.perform_request_search(f=field,
                                                           p=fake_exp,
                                                           cc=collection)
    if reclist:
        fieldvaluelist = search_engine.get_most_popular_field_values(
            reclist, field)
        val_list = []
        for f in fieldvaluelist:
            (val, dummy) = f
            #support "starts with",
            #indicated by the * at the end of the searchstring
            if searchwith and (len(searchwith) > 2) and (searchwith[-2]
                                                         == '*'):
                if (val.startswith(searchwith[1:-3])):
                    val_list.append(val)
            else:
                val_list.append(val)
        return val_list
    return []  #in case nothing worked
Пример #6
0
def arxiv_login(req, picked_profile=None):
    '''
    Log in through arxive. If user already associated to a personid, returns the personid.
    If user has no pid, try to guess which personid to associate based on surname and papers
    from arxiv. If no compatible person is found, creates a new person.
    At the end of the process opens a ticket for the user claiming the papers from arxiv.
    !!! the user will find the open ticket, which will require him to go through the
    final review before getting committed.

    @param req: Apache request object
    @type req: Apache request object

    @return: Returns the pid resulting in the process
    @rtype: int
    '''
    def session_bareinit(req):
        session = get_session(req)
        try:
            pinfo = session["personinfo"]
            if 'ticket' not in pinfo:
                pinfo["ticket"] = []
        except KeyError:
            pinfo = dict()
            session['personinfo'] = pinfo
            pinfo["ticket"] = []
        session.dirty = True




    session_bareinit(req)
    session = get_session(req)

    pinfo = session['personinfo']
    ticket = session['personinfo']['ticket']

    uinfo = collect_user_info(req)
    pinfo['external_first_entry'] = False

    try:
        name = uinfo['external_firstname']
    except KeyError:
        name = ''
    try:
        surname = uinfo['external_familyname']
    except KeyError:
        surname = ''

    if surname:
        session['personinfo']['arxiv_name'] = nameapi.create_normalized_name(
                                          nameapi.split_name_parts(surname + ', ' + name))
    else:
        session['personinfo']['arxiv_name'] = ''

    session.dirty = True

    try:
        arxiv_p_ids = uinfo['external_arxivids'].split(';')
    except KeyError:
        arxiv_p_ids = []

    #'external_arxivids': 'hep-th/0112017;hep-th/0112020',
    #'external_familyname': 'Weiler',
    #'external_firstname': 'Henning',

    try:
        found_bibrecs = set(reduce(add, [perform_request_search(p='037:' + str(arx), of='id', rg=0)for arx in arxiv_p_ids]))
    except (IndexError, TypeError):
        found_bibrecs = set()

    #found_bibrecs = [567700, 567744]

    uid = getUid(req)
    pid, pid_found = dbapi.get_personid_from_uid([[uid]])

    if pid_found:
        pid = pid[0]
    else:
        if picked_profile == None:
            top5_list = dbapi.find_top5_personid_for_new_arXiv_user(found_bibrecs,
                nameapi.create_normalized_name(nameapi.split_name_parts(surname + ', ' + name)))
            return ("top5_list", top5_list)
        else:
            pid = dbapi.check_personids_availability(picked_profile, uid)

    pid_bibrecs = set([i[0] for i in dbapi.get_all_personids_recs(pid, claimed_only=True)])
    missing_bibrecs = found_bibrecs - pid_bibrecs
    #present_bibrecs = found_bibrecs.intersection(pid_bibrecs)

    #assert len(found_bibrecs) == len(missing_bibrecs) + len(present_bibrecs)

    tempticket = []
    #now we have to open the tickets...
    #person_papers contains the papers which are already assigned to the person and came from arxive,
    #they can be claimed regardless

    for bibrec in missing_bibrecs:
        tempticket.append({'pid':pid, 'bibref':str(bibrec), 'action':'confirm'})

    #check if ticket targets (bibref for pid) are already in ticket
    for t in list(tempticket):
        for e in list(ticket):
            if e['pid'] == t['pid'] and e['bibref'] == t['bibref']:
                ticket.remove(e)
        ticket.append(t)

    session.dirty = True

    if picked_profile != None and picked_profile != pid and picked_profile != -1:

        return ("chosen pid not available", pid)
    elif picked_profile != None and picked_profile == pid and picked_profile != -1:
        return ("pid assigned by user", pid)
    else:
        return ("pid", pid)
Пример #7
0
def arxiv_login(req):
    '''
    Log in through arxive. If user already associated to a personid, returns the personid.
    If user has no pid, try to guess which personid to associate based on surname and papers
    from arxiv. If no compatible person is found, creates a new person.
    At the end of the process opens a ticket for the user claiming the papers from arxiv.
    !!! the user will find the open ticket, which will require him to go through the
    final review before getting committed.

    @param req: Apache request object
    @type req: Apache request object

    @return: Returns the pid resulting in the process
    @rtype: int
    '''
    def session_bareinit(req):
        session = get_session(req)
        try:
            pinfo = session["personinfo"]
            if 'ticket' not in pinfo:
                pinfo["ticket"] = []
        except KeyError:
            pinfo = dict()
            session['personinfo'] = pinfo
            pinfo["ticket"] = []
        session.save()

    session_bareinit(req)
    session = get_session(req)

    pinfo = session['personinfo']
    ticket = session['personinfo']['ticket']

    uinfo = collect_user_info(req)
    pinfo['external_first_entry'] = False

    try:
        name = uinfo['external_firstname']
    except KeyError:
        name = None
    try:
        surname = uinfo['external_familyname']
    except KeyError:
        surname = None

    session['personinfo']['arxiv_name'] = nameapi.create_normalized_name(
                                          nameapi.split_name_parts(surname + ', ' + name))
    session.save()

    try:
        arxiv_p_ids = uinfo['external_arxivids'].split(';')
    except KeyError:
        arxiv_p_ids = []

    #'external_arxivids': 'hep-th/0112017;hep-th/0112020',
    #'external_familyname': 'Weiler',
    #'external_firstname': 'Henning',

    try:
        found_bibrecs = set(zip(*[perform_request_search(p='037:' + str(arx), of='id', rg=0)
                              for arx in arxiv_p_ids])[0])
    except IndexError:
        found_bibrecs = set()

    #found_bibrecs = [567700, 567744]

    uid = getUid(req)
    pid, pid_found = dbapi.get_personid_from_uid([[uid]])

    if not pid_found:
        pid = dbapi.reclaim_personid_for_new_arXiv_user(found_bibrecs,
                    nameapi.create_normalized_name(nameapi.split_name_parts(surname + ', ' + name)), uid)
    else:
        pid = pid[0]

    pid_bibrecs = set([i[0] for i in dbapi.get_all_personids_recs(pid, claimed_only=True)])
    missing_bibrecs = found_bibrecs - pid_bibrecs
    #present_bibrecs = found_bibrecs.intersection(pid_bibrecs)

    #assert len(found_bibrecs) == len(missing_bibrecs) + len(present_bibrecs)

    tempticket = []
    #now we have to open the tickets...
    #person_papers contains the papers which are already assigned to the person and came from arxive,
    #they can be claimed regardless

    for bibrec in missing_bibrecs:
        tempticket.append({'pid':pid, 'bibref':str(bibrec), 'action':'confirm'})

    #check if ticket targets (bibref for pid) are already in ticket
    for t in list(tempticket):
        for e in list(ticket):
            if e['pid'] == t['pid'] and e['bibref'] == t['bibref']:
                ticket.remove(e)
        ticket.append(t)
    session.save()
    return pid
def personid_fast_assign_papers(paperslist=None, use_threading_not_multiprocessing=True):
    '''
    Assign papers to the most compatible person.
    Compares only the name to find the right person to assign to. If nobody seems compatible,
    create a new person.
    '''

    class Worker(Thread):
        def __init__(self, i, p_q, atul, personid_new_id_lock, checker):
            Thread.__init__(self)
            self.i = i
            self.checker = checker
            self.p_q = p_q
            self.atul = atul
            self.personid_new_id_lock = personid_new_id_lock

        def run(self):
            while True:
                if checker.should_stop():
                    break
                try:
                    bibrec = self.p_q.get_nowait()
                except Empty:
                    break
                close_connection()

                pfap_assign_paper_iteration(self.i, bibrec, self.atul, self.personid_new_id_lock)

    def _pfap_assign_paper(i, p_q, atul, personid_new_id_lock, checker):
        while True:
            # check bibsched
            if checker.should_stop():
                break

            try:
                bibrec = p_q.get_nowait()
            except Empty:
                break

            pfap_assign_paper_iteration(i, bibrec, atul, personid_new_id_lock)


    _pfap_printmsg('starter', 'Started')
    if not paperslist:
        #paperslist = run_sql('select id from bibrec where 1')
        paperslist = [[x] for x in perform_request_search(p="")]

    paperslist = [k[0] for k in paperslist]

    _pfap_printmsg('starter', 'Starting on %s papers ' % len(paperslist))

    if use_threading_not_multiprocessing:
        authornames_table_update_lock = Lock()
        personid_new_id_lock = Lock()
        papers_q = Queue()
    else:
        authornames_table_update_lock = multiprocessing.Lock()
        personid_new_id_lock = multiprocessing.Lock()
        papers_q = multiprocessing.Queue()

    for p in paperslist:
        papers_q.put(p)

    process_list = []
    c = 0
    if not use_threading_not_multiprocessing:
        while not papers_q.empty():
            checker = status_checker()
            while len(process_list) <= bconfig.CFG_BIBAUTHORID_MAX_PROCESSES:
                p = multiprocessing.Process(target=_pfap_assign_paper, args=(c, papers_q,
                                                                    authornames_table_update_lock,
                                                                    personid_new_id_lock, checker))
                c += 1
                process_list.append(p)
                p.start()

            for i, p in enumerate(tuple(process_list)):
                if not p.is_alive():
                    p.join()
                    process_list.remove(p)

            task_sleep_now_if_required(can_stop_too=False)
    else:
        max_processes = bconfig.CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS
        checker = status_checker()
        workers = []
        while not papers_q.empty():
            i = 0
            while len(workers) < max_processes:
                w = Worker(i, papers_q, authornames_table_update_lock,
                           personid_new_id_lock, checker)
                i += 1
                w.start()
                workers.append(w)
            for c, p in enumerate(tuple(workers)):
                if not p.is_alive():
                    p.join()
                    workers.remove(p)

            task_sleep_now_if_required(can_stop_too=False)
def get_all_valid_bibrecs():
    collection_restriction_pattern = " or ".join(["980__a:\"%s\"" % x for x in bconfig.LIMIT_TO_COLLECTIONS])
    return perform_request_search(p="%s" % collection_restriction_pattern, rg=0)