Python delete示例，vpapi.delete Python示例

示例#1

0

显示文件

文件： updater_1.py 项目： KohoVolit/scraper-senat.cz

def my_put(resource, item, vpapi):
    ex = vpapi.get(resource, where={"id": item['id']})
    if len(ex['_items']) >= 1:
        #somehow vpapi.put does not work for me, so delete and post
        #vpapi.put(resource,item['id'],item)
        vpapi.delete(resource, item['id'])
    vpapi.post(resource, item)

示例#2

0

显示文件

文件： people.py 项目： KohoVolit/scraper-psp.cz

def saveperson(scraped):
    import json
    for ident in scraped["identifiers"]:
        if ident["scheme"] == "psp.cz/osoby":
            identifier = ident
            break

    r = vpapi.get('people', where={'identifiers': {'$elemMatch': identifier}})
    if not r['_items']:
        r = vpapi.post('people', scraped)
    else:
        # update by PUT is preferred over PATCH to correctly remove properties that no longer exist now
        existing = r['_items'][0]
        # somehow vpapi.put does not work for me, so delete and post
#        r = vpapi.put('people', existing['id'], scraped)
        vpapi.delete("people", existing['id'])
        r = vpapi.post('people', scraped)
    if r['_status'] != 'OK':
        raise Exception(self.name, resp)
    return r['id']

示例#3

0

显示文件

文件： organizations.py 项目： KohoVolit/scraper-psp.cz

def save_organization(scraped):

    r = vpapi.get('organizations', where={'identifiers': {'$elemMatch': scraped["identifiers"][0]}})
    if not r['_items']:
        r = vpapi.post('organizations', scraped)
        print ("POST " + scraped['id'])
#        outid = r['id']
    else:
        # update by PUT is preferred over PATCH to correctly remove properties that no longer exist now
#        outid = r['_items'][0]['id']
        existing = r['_items'][0]
#        r = vpapi.put('organizations', existing['id'], scraped)
        #somehow vpapi.put does not work for me, so delete and post
        #vpapi.put(resource,item['id'],item)
        vpapi.delete("organizations",existing['id'])
        r = vpapi.post('organizations', scraped)
        print ("PUT " + scraped['id'])
    if r['_status'] != 'OK':
        raise Exception(scraped.name, r)
    return r['id']

示例#4

0

显示文件

文件： memberships.py 项目： KohoVolit/scraper-psp.cz

def savemembership(self):
    r = vpapi.get('memberships',where={'person_id': self["person_id"], 'organization_id': self["organization_id"], "role": "member", "start_date": self["start_date"]})
    if not r['_items']:
        r = vpapi.post("memberships",self)
    else:
        #somehow vpapi.put does not work for me, so delete and post
        update = True
        try:
            if r['_items'][0]["end_date"] == self["end_date"]:
                update = False
                print("not updating: " + r['_items'][0]['id'])
        except:
            nothing = 0
        if update:
            vpapi.delete("memberships",r['_items'][0]['id'])
            self['id'] = r['_items'][0]['id']
            r = vpapi.post('memberships', self)
            print("updating: " + self['id'])
        
            
#        r = vpapi.put('memberships/%s' % r['_items'][0]['id'],self)
            if r['_status'] != 'OK':
                raise Exception(self.name, r)

示例#5

0

显示文件

def scrape_old_debates(term):
	"""Scrape and save speeches from debates of the given term, one
	of those older terms where transcripts of debates are stored in
	RTF files.

	Returns number of scraped speeches.
	"""

	def insert_speech(type):
		"""Insert a speech entity with the given type and data
		from parent scope variables and update end date of the
		corresponding session and sitting. Delete `text`
		variable."""
		nonlocal text, position
		if not text: return
		position = position + 1
		speech = {
			'text': text.strip().replace('[', '(').replace(']', ')'),
			'type': type,
			'position': position,
			'event_id': sitting_id,
			'sources' : [{
				'url': debate['url'],
				'note': 'Prepis debaty v Digitálnej knižnici na webe NRSR'
			}]
		}
		if type != 'scene':
			speech['creator_id'] = speaker_id
			speech['attribution_text'] = attribution.strip()
		speeches.append(speech)
		text = ''

		if date > session_end_date:
			vpapi.patch('events', session_id, {'end_date': date})
		if date > sitting_end_date:
			vpapi.patch('events', sitting_id, {'end_date': date})

	logging.info('Scraping debates of term `%s`' % term)
	chamber_id = get_chamber_id(term)

	# prepare mapping from MP's name to id
	people = vpapi.getall('people', projection={'given_name': 1, 'additional_name': 1, 'family_name': 1})
	mps = {}
	for mp in people:
		if 'additional_name' in mp:
			name = '%s. %s. %s' % (mp['given_name'][0], mp['additional_name'][0], mp['family_name'])
		else:
			name = '%s. %s' % (mp['given_name'][0], mp['family_name'])
		mps[name] = mp['id']

	# load name corrections
	with open(os.path.join(CONF_DIR, 'name_corrections.json'), encoding='utf8') as f:
		name_corrections = json.load(f)

	# scrape list of debates
	debates = parse.old_debates_list(term)

	# add the debate missing in the list
	if term == '4':
		debates['_items'].append({
			'názov': 'Autorizovaná rozprava, 48. schôdza NR SR, 3. 2. 2010',
			'id': '2010_02_03',
			'url': 'http://www.nrsr.sk/dl/Browser/DsDocument?documentId=391413'
		})

	speech_count = 0
	session_identifier = None
	for debate in debates['_items']:
		# skip obsolete debates in the list
		if term == '1':
			if (debate['názov'] == 'Stenozáznam' and debate['id'] != '198550' or
					debate['id'] in ('65890', '65945', '65949')):
				continue
		elif term == '2':
			if debate['názov'].startswith('Stenografická') and debate['id'] != '92098':
				continue
		elif term == '3':
			if debate['id'] == '181047':
				continue

		logging.info('Scraping debate `%s` (id=%s)' % (debate['názov'], debate['id']))
		if term == '1':
			paragraphs = parse.debate_of_term1(debate['id'])
		else:
			paragraphs = parse.debate_of_terms234(debate['id'])

		# normalize header of the debate transcript
		if term == '2':
			# join first 4 paragraphs and add trailing underscores to mark the header
			paragraphs = ['%s %s %s %s\n___' % (paragraphs[0], paragraphs[1], paragraphs[2],
				paragraphs[3])] + paragraphs[4:]
		elif term in ('3', '4'):
			# join first paragraphs until " hodine" ending is found
			# and add trailing underscores to mark the header
			p = ''
			while True:
				p += ' ' + paragraphs.pop(0)
				if p.endswith('hodine'): break
			if paragraphs[0].startswith('___'):
				paragraphs.pop(0)
			paragraphs.insert(0, p + '\n___')

		# extract speeches from the debate
		speeches = []
		text = ''
		within_scene = False
		for par in paragraphs:
			par = par.replace('\n', ' ').strip()
			if not par: continue

			# fix last scene
			if re.search(r'\b(skončil.|skončené|prerušené|Prerušenie rokovani[ae])\s+o\s+(.*?)\s+hodine.', par):
				if not par[0] in ('(', '[', '/'):
					par = '(%s)' % par

			# convert brackets to parentheses
			par = re.sub(r'\[(.*?)\]', r'(\1)', par)
			# slash pairs are converted to parentheses too in term 1
			if term == '1':
				par = re.sub(r'(^|\s)/(.*?)/(\s|$)', r'\1(\2)\3', par)
			# convert all inner nested parentheses to brackets
			n = 1
			while n >= 1:
				(par, n) = re.subn(r'\((.*?)\((.*?)\)(.*?)\)', r'(\1[\2]\3)', par, flags=re.DOTALL)

			# process eventual multiparagraph scene
			if par.startswith('(') and par.count('(') > par.count(')'):
				# save eventual previous speech
				insert_speech('speech')

				text = '<p>%s</p>' % par[1:]
				within_scene = True
				continue
			if within_scene:
				if par.endswith(')') and par.count(')') > par.count('('):
					text += '\n\n<p>%s</p>' % par[:-1]
					insert_speech('scene')
					within_scene = False
				else:
					text += '\n\n<p>%s</p>' % par
				continue

			# process eventual header
			header_pattern = r'((\(?(\d+)\.\)?\s+schôdz)|slávnostn).*?(\d+)\..*\b(\w{3,})\s+(\d{4})(.*?)_{3,}$'
			hd = re.search(header_pattern, par, re.DOTALL)
			if hd:
				# save eventual previous speech
				insert_speech('speech')

				sk_date = '%s. %s %s' % (hd.group(4), hd.group(5), hd.group(6))
				initial_time = re.search(r'\s+o\s+(.*?)\s+hodine', hd.group(7), re.DOTALL)
				if initial_time and initial_time.group(1) != '??':
					h, m = initial_time.group(1).strip('.').split('.')
					date = sk_to_utc(sk_date + ' %s:%s:00' % (h.strip().zfill(2), m.strip().zfill(2)))
				else:
					date = sk_to_utc(sk_date) + 'T00:00:00'

				if hd.group(1).startswith('sláv'):
					new_session_name = 'Mimoriadna schôdza'
					if term == '1':
						new_session_identifier = debate['časť']
					elif term == '2':
						new_session_identifier = '1000'
					else:
						sl = parse.session_list(term)
						d = '%s. %s. %s' % (int(date[8:10]), int(date[5:7]), int(date[0:4]))
						new_session_identifier = next((s['číslo'] for s in sl['_items'] if s['trvanie'] == d))
				else:
					new_session_name = '%s. schôdza' % hd.group(3)
					new_session_identifier = hd.group(3)

				if new_session_identifier != session_identifier:
					# create new session event
					session = {
						'name': new_session_name,
						'identifier': new_session_identifier,
						'organization_id': chamber_id,
						'type': 'session',
						'start_date': date,
					}
					key = ('organization_id', 'type', 'identifier')
					session_id, _ = get_or_create('events', session, key)
					session_identifier = new_session_identifier
					session_end_date = date
					sitting_count = 0

				# create new sitting event
				sitting_count += 1
				sitting = {
					'name': '%s. deň rokovania, %s' % (sitting_count, sk_date),
					'identifier': str(sitting_count),
					'organization_id': chamber_id,
					'type': 'sitting',
					'start_date': date,
					'parent_id': session_id,
				}
				key = ('parent_id', 'type', 'identifier')
				sitting_id, created = get_or_create('events', sitting, key)
				sitting_end_date = date
				position = 0

				# delete existing speeches of the sitting
				if not created:
					obsolete = vpapi.getall('speeches', where={'event_id': sitting_id})
					for speech in obsolete:
						vpapi.delete('speeches', speech['id'])
				continue

			# process eventual start of a speech
			if date < '2001-09-04':
				# format `Foreign minister J. Doe:`
				speech_start_pattern = r'(.*?)\b([^\W\d])\.[\s_]+((\w)\.[\s_]+)?([\w-]+):$'
			else:
				# format `J. Doe, foreign minister: speech`
				speech_start_pattern = r'([^\W\d])\.[\s_]+((\w)\.[\s_]+)?([\w-]+),\s+(.+?):(.+)$'
			sp = re.match(speech_start_pattern, par, re.DOTALL)
			if sp:
				# save eventual previous speech
				insert_speech('speech')

				# identify speaker
				if date < '2001-09-04':
					name = '%s. %s' % (sp.group(2), sp.group(5))
					if (sp.group(4)):
						name = name.replace(' ', ' %s. ' % sp.group(4))
					attribution = sp.group(1)
					par = ''
				else:
					name = '%s. %s' % (sp.group(1), sp.group(4))
					if (sp.group(3)):
						name = name.replace(' ', ' %s. ' % sp.group(3))
					attribution = sp.group(5)
					par = sp.group(6)

				if name in name_corrections:
					name = name_corrections[name]
				attribution = attribution[0].lower() + attribution[1:].strip()
				speaker_id = mps.get(name)

				# create unknown speakers
				if not speaker_id:
					logging.warn('Speaker `%s, %s` not found, creating new Person' % (name, attribution))
					name_parts = re.match(r'(\w)\. ((\w)\. )?(\w+)', name)
					person = {
						'name': name,
						'family_name': name_parts.group(4),
						'given_name': name_parts.group(1)
					}
					person['sort_name'] = '%s, %s.' % (person['family_name'], person['given_name'])
					if name_parts.group(3):
						person['additional_name'] = name_parts.group(3)
						person['sort_name'] += ' %s.' % person['additional_name']
					resp = vpapi.post('people', person)
					speaker_id = resp['id']
					mps[name] = speaker_id

			# recognize date(-time) stamps in transcripts
			ds = re.match(r'^\s*(\d+\.\s\w+\s\d{4})(.*hodine)?\s*$', par)
			if ds:
				dt = ds.group(1).strip()
				tm = re.search(r'o\s+(.*?)\s+', ds.group(2) or '')
				try:
					if tm:
						h, m = tm.group(1).strip('.').split('.')
						date = sk_to_utc('%s %s:%s:00' % (dt, h.strip().zfill(2), m.strip().zfill(2)))
					else:
						date = sk_to_utc(dt) + 'T00:00:00'
					continue
				except ValueError:
					pass

			# process eventual scene in this paragraph
			scene_pattern = r'(.*?)\(\s*([\d%s][^\(\)]{2,}[\.?!“])\s*\)(.*)$' % scrapeutils.CS_UPPERS
			while True:
				scene = re.match(scene_pattern, par, re.DOTALL)
				if not scene: break
				if scene.group(1):
					text += '\n\n<p>%s</p>' % scene.group(1).strip()
				insert_speech('speech')
				text = '<p>%s</p>' % scene.group(2).strip()
				insert_speech('scene')
				par = scene.group(3)

			if par:
				text += '\n\n<p>%s</p>' % par.strip()

		insert_speech('speech')

		# extract end time of the session
		final_time = re.search(
			r'\b(skončil.|skončené|prerušené|Prerušenie rokovani[ae])\s+o\s+(.*?)\s+hodine.',
			speeches[-1]['text'])
		if final_time:
			tm = final_time.group(2)
			tm = tm.replace('O', '0').replace(',', '.')
			h, m = tm.strip('.').split('.')
			final_date = '%s.%s.%s %s:%s:00' % (date[8:10], date[5:7], date[0:4], h.strip().zfill(2), m.strip().zfill(2))
			final_date = sk_to_utc(final_date)
			vpapi.patch('events', session_id, {'end_date': final_date})
			vpapi.patch('events', sitting_id, {'end_date': final_date})

		vpapi.post('speeches', speeches)
		logging.info('Scraped %s speeches' % len(speeches))
		speech_count += len(speeches)

	logging.info('Scraped %s speeches in total' % speech_count)

示例#6

0

显示文件

def scrape_motions(term):
	"""Scrape and save motions from the given term that are not scraped
	yet starting from the oldest ones. One Motion item, one VoteEvent
	item and many Vote items are created for each scraped motion detail
	page.

	Returns number of scraped motions.
	"""
	logging.info('Scraping motions of term `%s`' % term)

	# prepare mappings from source identifier to id for MPs and parliamentary groups
	chamber_id = get_chamber_id(term)

	people = vpapi.getall('people', projection={'identifiers': 1})
	mps = {mp['identifiers'][0]['identifier']: mp['id'] for mp in people if 'identifiers' in mp}

	orgs = vpapi.getall('organizations', where={'classification': 'parliamentary group', 'parent_id': chamber_id})
	parl_groups = {c['name']: c['id'] for c in orgs}

	# add differently spelled parliamentary groups
	group_corrections = {
		'2': {
			'Klub HZDS': 'Klub ĽS-HZDS',
			'Klub SMK': 'Klub SMK-MKP',
			'Klub Nezávislí': 'Klub Nezávislý',
		},
		'3': {
			'Klub HZDS': 'Klub ĽS-HZDS',
			'Klub SDKÚ': 'Klub SDKÚ-DS',
			'Klub Smer': 'Klub SMER-SD',
			'Klub Smer-SD': 'Klub SMER-SD',
			'Klub KNP': 'Klub nezávislých poslancov NR SR',
			'Klub Nezávislý': 'Klub nezávislých poslancov NR SR',
		},
	}
	for k, v in group_corrections.get(term, {}).items():
		parl_groups[k] = parl_groups[v]

	# prepare list of sessions that are not completely scraped yet
	sessions_to_scrape = []
	session_list = parse.session_list(term)
	for session in session_list['_items']:
		motions = parse.session(session['číslo'], term)
		if len(motions['_items']) == 0: continue
		last_motion_id = motions['_items'][-1]['id']
		m_url = 'http://www.nrsr.sk/web/Default.aspx?sid=schodze/hlasovanie/hlasklub&ID=%s' % last_motion_id
		existing = vpapi.getfirst('motions', where={'sources.url': m_url})
		if existing: break
		sessions_to_scrape.append((session, motions))

	# scrape motions from those sessions
	scraped_motions_count = 0
	for s, motions in reversed(sessions_to_scrape):
		logging.info('Scraping session `%s`' % s['názov'])

		# insert the session event unless it already exists
		session = {
			'name': s['názov'],
			'identifier': s['číslo'],
			'organization_id': chamber_id,
			'type': 'session',
		}
		try:
			session['start_date'] = sk_to_utc(s['trvanie']) + 'T00:00:00'
			session['end_date'] = session['start_date']
		except ValueError:
			# multiday session contains votes; dates are set by debates scraping
			pass
		key = ('organization_id', 'type', 'identifier')
		session_id, _ = get_or_create('events', session, key)

		for i, m in enumerate(motions['_items']):
			# check if the motion is already present
			m_id = re.search(r'ID=(\d+)', m['url']['výsledok']).group(1)
			# we not use directly m['url']['kluby'] because it is not always present
			m_url = 'http://www.nrsr.sk/web/Default.aspx?sid=schodze/hlasovanie/hlasklub&ID=%s' % m_id
			existing = vpapi.getfirst('motions', where={'sources.url': m_url})
			if existing: continue

			try:
				motion_id = None
				vote_event_id = None

				# insert motion
				logging.info('Scraping motion %s of %s (voted at %s)' % (i+1, len(motions['_items']), m['dátum']))
				parsed_motion = parse.motion(m['id'])
				motion = {
					'organization_id': chamber_id,
					'legislative_session_id': session_id,
					'identifier': parsed_motion['číslo'],
					'text': parsed_motion['názov'],
					'date': sk_to_utc(m['dátum']),
					'sources': [{
						'url': parsed_motion['url'],
						'note': 'Hlasovanie na webe NRSR'
					}],
				}
				if 'výsledok' in parsed_motion:
					motion['result'] = 'pass' if parsed_motion['výsledok'] == 'Návrh prešiel' else 'fail'
				resp = vpapi.post('motions', motion)
				motion_id = resp['id']

				# insert vote event
				vote_event = {
					'motion_id': motion_id,
					'organization_id': chamber_id,
					'legislative_session_id': session_id,
					'identifier': parsed_motion['číslo'],
					'start_date': motion['date'],
					'sources': [{
						'url': parsed_motion['url'],
						'note': 'Hlasovanie na webe NRSR'
					}],
				}
				if 'výsledok' in parsed_motion:
					vote_event['result'] = motion['result']
				if 'súčty' in parsed_motion:
					options = {
						'yes': '[z] za',
						'no': '[p] proti',
						'abstain': '[?] zdržalo sa',
						'absent': '[0] neprítomní',
						'not voting': '[n] nehlasovalo'
					}
					vote_event['counts'] = [
						{'option': o, 'value': int(parsed_motion['súčty'][s])}
						for o, s in options.items() if parsed_motion['súčty'][s] != ''
					]
					if len(vote_event['counts']) == 0:
						del vote_event['counts']
				resp = vpapi.post('vote-events', vote_event)
				vote_event_id = resp['id']

				# insert votes
				if 'hlasy' in parsed_motion and len(parsed_motion['hlasy']) > 0:
					vote_options = {
						'z': 'yes',
						'p': 'no',
						'?': 'abstain',
						'n': 'not voting',
						'0': 'absent'
					}
					votes = []
					for v in parsed_motion['hlasy']:
						# skip MPs not applying their mandate
						if v['hlas'] == '-': continue
						pg = normalize_parlgroup_name(v['klub'])
						votes.append({
							'vote_event_id': vote_event_id,
							'option': vote_options[v['hlas']],
							'voter_id': mps.get(v['id']),
							'group_id': parl_groups.get(pg),
						})
					if len(votes) > 0:
						resp = vpapi.post('votes', votes)

			# delete incomplete data if insertion of the motion, vote event or votes failed
			except:
				if motion_id:
					vpapi.delete('motions', motion_id)
				if vote_event_id:
					vpapi.delete('vote-events', vote_event_id)
				raise

			scraped_motions_count += 1

	logging.info('Scraped %s motions of term `%s`' % (scraped_motions_count, term))
	return scraped_motions_count

示例#7

0

显示文件

def main():
	# read command-line arguments
	ap = argparse.ArgumentParser('Scrapes data from Slovak parliament website http://nrsr.sk')
	ap.add_argument('--people', choices=['initial', 'recent', 'none'], default='recent', help='scrape of people, organizations and memberships')
	ap.add_argument('--votes', choices=['initial', 'recent', 'none'], default='recent', help='scrape of motions and votes')
	ap.add_argument('--debates', choices=['initial', 'recent', 'none'], default='recent', help='scrape of speeches from debates')
	ap.add_argument('--term', help='term to scrape recent data from; current term is used when omitted')
	args = ap.parse_args()

	# set-up logging to a local file
	if not os.path.exists(LOGS_DIR):
		os.makedirs(LOGS_DIR)
	logname = datetime.utcnow().strftime('%Y-%m-%d-%H%M%S') + '.log'
	logname = os.path.join(LOGS_DIR, logname)
	logname = os.path.abspath(logname)
	logging.basicConfig(level=logging.DEBUG, format='%(message)s', handlers=[logging.FileHandler(logname, 'w', 'utf-8')])
	logging.getLogger('requests').setLevel(logging.ERROR)

	logging.info('Started')
	try:
		# set-up the API access
		vpapi.parliament('sk/nrsr')
		vpapi.timezone('Europe/Bratislava')
		with open(os.path.join(CONF_DIR, 'private.json'), encoding='utf8') as f:
			creds = json.load(f)
		vpapi.authorize(creds['api_user'], creds['password'])

		# indicate that the scraper has started
		db_log = vpapi.post('logs', {'status': 'running', 'file': logname, 'params': args.__dict__})

		# clear cached source files
		if scrapeutils.USE_WEBCACHE:
			logging.info('Clearing cached files')
			scrapeutils.clear_cache()

		# test parser functions
		logging.info('Testing parser functions')
		out = io.StringIO()
		suite = unittest.TestLoader().loadTestsFromModule(sys.modules['test'])
		result = unittest.TextTestRunner(stream=out).run(suite)
		logging.info(out.getvalue())
		if result.errors or result.failures:
			raise RuntimeError('Unit tests of parser functions failed, update canceled.')

		if args.people == 'initial':
			# initial scrape of all history of people and organizations
			logging.info('Initial scrape - deleting people, organizations and memberships')
			vpapi.delete('memberships')
			vpapi.delete('organizations')
			vpapi.delete('people')
			for term in sorted(parse.terms.keys()):
				scrape_people(term)

		elif args.people == 'recent':
			# incremental scrape of people and organizations since the last scrape
			term = args.term or parse.current_term()
			if term not in parse.terms:
				raise Exception('Unknown term `%s`. Scrape canceled. Add it to the terms list in parse.py an rerun for the recently finished term once more.' % term)
			scrape_people(term)

		terms_with_old_debates = ('1', '2', '3', '4')
		if args.debates == 'initial':
			# initial scrape of debates from all terms
			logging.info('Initial scrape - deleting speeches and events')
			vpapi.delete('speeches')
			vpapi.delete('events')
			# newer terms are scraped first to get full names of unknown speakers
			for term in sorted(parse.terms.keys()):
				if term in terms_with_old_debates: continue
				scrape_new_debates(term)
			for term in terms_with_old_debates:
				scrape_old_debates(term)

		elif args.debates == 'recent':
			# incremental scrape of debates since the last scrape
			term = args.term or parse.current_term()
			if term not in parse.terms:
				raise Exception('Unknown term `%s`. Scrape canceled. Add it to the terms list in parse.py an rerun once more.' % term)
			if term in terms_with_old_debates:
				scrape_old_debates(term)
			else:
				scrape_new_debates(term)

		if args.votes == 'initial':
			# initial scrape of votes from all terms
			logging.info('Initial scrape - deleting votes, vote-events and motions')
			vpapi.delete('votes')
			vpapi.delete('vote-events')
			vpapi.delete('motions')
			for term in sorted(parse.terms.keys()):
				scrape_motions(term)

		elif args.votes == 'recent':
			# incremental scrape of votes since the last scrape
			term = args.term or parse.current_term()
			if term not in parse.terms:
				raise Exception('Unknown term `%s`. Scrape canceled. Add it to the terms list in parse.py an rerun once more.' % term)
			scrape_motions(term)

		status = 'finished'

	except BaseException as e:
		logging.critical(e, exc_info=True)
		if hasattr(e, 'response') and hasattr(e.response, '_content'):
			logging.critical(e.response._content.decode('utf-8'))
		status = 'interrupted' if isinstance(e, KeyboardInterrupt) else 'failed'

		# output to console to provoke an e-mail from Cron
		print('Scraping of parliament sk/nrsr failed, see\n\n' + logname + '\n\nfor details.')

	finally:
		logging.info(status.capitalize())
		if 'db_log' in locals():
			vpapi.patch('logs', db_log['id'], {'status': status})

示例#8

0

显示文件

文件： delete_motion_ve_votes.py 项目： KohoVolit/scraper-psp.cz

import vpapi
import authentication

vpapi.parliament('cz/psp')
vpapi.authorize(authentication.username,authentication.password)
vpapi.timezone('Europe/Prague')

vpapi.delete("votes")
vpapi.delete("vote-events")
vpapi.delete("motions")