Python PublicationDao.PublicationDao示例

编程语言: Python

命名空间/包名称: com.lish.ajia.googlescholar.daos

类/类型: PublicationDao

方法/功能: PublicationDao

hotexamples.com的示例: 6

Python PublicationDao.PublicationDao - 已找到6个示例。这些是从开源项目中提取的最受好评的com.lish.ajia.googlescholar.daos.PublicationDao.PublicationDao现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

PublicationDao(6)

getPublicationByPerson(2)

getLeftCount(1)

getTotalCount(1)

示例#1

显示文件

    def test_matchPub(self):
        self.extractor = Extractor().getInstance()
        pubdao = PublicationDao()
        person_id = 13419
        person_name = 'jie tang'
        # Read sources from files
        all_models = {}
        for page in range(0, 3):
            filename = "".join((person_name, '_page_', str(page), '.html'))
            f = file(os.path.join(self.settings.source_dir, filename), 'r')
            html = f.read()
            models = self.extractor.extract_from_source(html)
            if models is not None:
                self.extractor._Extractor__merge_into_extractedmap(
                    all_models, models)
        print 'Total found DEBUG  %s items.' % len(all_models)

        # part 2
        pubs = pubdao.getPublicationByPerson(person_id,
                                             self.settings.generation)

        printout = False
        if printout:
            for key, models in all_models.items():
                print key, " --> ", models
            print '==================='
            for pub in pubs:
                print pub

        (pubs_matched, pubs_not_matched) = self.matchPub(pubs, all_models)
        print '- test done -', len(pubs_matched), len(pubs_not_matched)
        return pubs_not_matched

示例#2

显示文件

	def __init__(self, generation, mgr_interval=5):
		self.settings = Settings.getInstance()
		self.debug = self.settings.debug

		self.gen = generation
		self.mgr_interval = mgr_interval

		self.person_queue 		 = Queue.Queue(maxsize=self.settings.person_cache_size)
		self.person_id_set	 	 = set([])	# sync with queue, quick contains using id. 

		self.pubmap		 		 = {}		# {id -> pub}
		self.person_pub_map		 = {}		# {person_id->[pub_id_list]} - person to pub_ids
		self.pub_db_cache 		 = {}

		self.pub_lock			 = threading.Lock()
		self.pub_dbcache_lock 	 = threading.RLock()

		self.running = True #sync ed with main running flag in mgr_interval_thread
		self.blocked_pub_t 		 = 0

		# time sum
		self.ppt_wait = 0
		self.ppt_getlock = 0
		self.ppt_get = 0

		self.person_dao = PersonDao()
		self.pub_dao = PublicationDao()

示例#3

显示文件

文件： t_person_processer.py 项目： yinonbaron/aminer-spider

    def __init__(self, extractorInstance):
        threading.Thread.__init__(self)

        self.extractor = extractorInstance
        self.store = self.extractor.store
        self.pubdao = PublicationDao()

        self.person = None  # set this and start.
        self.ask_to_stop = False
        self.last_action = datetime.datetime.now()

示例#4

显示文件

    def __init__(self):
        print "Task: extract paper's citation from schooler.google.com.\n"
        self.settings = Settings.getInstance()
        self.debug = self.settings.debug

        # Configs
        self.mgr_interval = 10  # seconds
        self.max_person_thread = 2  # max threads used to extract person,
        self.max_pub_thread = 2  # these 2 values can modified on the fly. diff in day or night

        # Threads and configurations
        self.t_mgr = None  # MgrThread(self)	# management thread, create
        self.t_provider = None
        self.person_thread_pool = [
        ]  #= Queue.Queue(maxsize=self.max_person_thread)
        self.pub_thread_pool = []  #= Queue.Queue(maxsize=self.max_pub_thread)

        self.busy_semaphore = 0  # 用来监视是否所有的线程都处于Idle状态
        self.busy_semaphore_lock = threading.Lock()  # 用来监视是否所有的线程都处于Idle状态

        # utils
        self.store = None

        # switchers & flags
        self.running = True  # If False, threads will stop after current task.
        self.stopped = False  # If MGRThread can stop.
        self.pause = False  # All works paused.
        self.waiting_to_finish = False  # No additional data. all added to queue.
        self.num_report = 0
        self.last_report_time = datetime.datetime.now()  # 上次Interval的时间

        self.restart_all_thread = False
        self.detect_exit_wait = 0  # 当刚刚从pause模式退出来时，会有大量failed的任务，会导致立刻再次等待

        self.generation = 0

        self.dao = dbs()
        self.personDao = PersonDao()
        self.pubDao = PublicationDao()

        if self.settings.save_pdflink:
            self.pdfcache = PDFLinkSaver.getInstance()

        # start
        self.determineGereration()

示例#5

显示文件

文件： debug.py 项目： yinonbaron/aminer-spider

 def __init__(self):
     self.extractor = Extractor.getInstance()
     self.matcher = PubMatcher.getInstance()
     self.pubdao = PublicationDao()

示例#6

显示文件

文件： update_author.py 项目： yinonbaron/aminer-spider

 def __init__(self, aid, generation):
     self.aid = aid
     self.generation = generation
     self.person = self.get_author(aid, generation)
     self.pubdao = PublicationDao()