Exemplo n.º 1
0
	def get_data(self, url, object_name):
		self.data = CM.get_api_data(url.format('1'))
		if self.data.ok:
			total_items = self.data.json()[CBDict.data.value][CBDict.paging.value][CBDict.total_items.value]
			number_of_pages = self.data.json()[CBDict.data.value][CBDict.paging.value][CBDict.number_of_pages.value]
			cols = self.data.json()[CBDict.data.value][CBDict.items.value][0][CBDict.properties.value].keys()
			cols = list(cols)
			cols.append('uuid')
			print(cols)
			print('Total items: {}\nTotal Pages: {}'.format(total_items, number_of_pages))
			data_list = []
			for j in range(0, number_of_pages):
				self.data = CM.get_api_data(url.format(j + 1))
				data = self.data.json()[CBDict.data.value][CBDict.items.value]
				print(j, '*' * j, len(data))
				for i in range(0, len(data)):
					dt = data[i][CBDict.properties.value]
					dt[CBDict.uuid.value] = data[i][CBDict.uuid.value]
					data_list.append(dt)
			df = pd.DataFrame(data_list, columns=cols)
			df.to_csv(self.file_name.format(object_name, str(time.time())), sep=',', columns=cols, index=False)
			print('File saved successfully!')
		else:
			print('SNAP! Something goes wrong.\nSTATUS: {}\nMESSAGE: {}'.format(self.data.json()[0]['status'], self.data.json()[0]['message']))
Exemplo n.º 2
0
	def save_orgs_relationship(self, api_url):
		url = api_url + self.api_org_token
		print(url)
		orgs = CM.get_api_data(url)
		if orgs.ok:

			self.org_uuid = orgs.json()[CBDict.data.value][CBDict.uuid.value]
			df = self.db.pandas_read(self.enum.SQL.sql_org_detail_exists.value.format(self.org_uuid))
			if len(df) == 0:#self.db.entity_exists('MDCRaw.CRUNCHBASE.Organization', 'org_uuid', self.org_uuid):
				self.save_organization_detail(self.org_uuid, orgs.json()[CBDict.data.value][CBDict.properties.value])
				rs_json = orgs.json()[CBDict.data.value][CBDict.relationships.value]
				self.save_funding_rounds(rs_json['funding_rounds'], self.org_uuid)
				self.save_relational_entity(rs_json[self.enum.CBDict.headquarters.value], self.org_uuid,
											self.enum.SQL.sql_offices_exists.value, self.enum.SQL.sql_offices_insert.value,
											self.office_col)
				self.save_relational_entity(rs_json['categories'], self.org_uuid,
											self.enum.SQL.sql_org_category_exists.value,
											self.enum.SQL.sql_org_category_insert.value, self.category_col)

			# save all the related entities
			# self.save_teams(rs_json['featured_team'], self.org_uuid, TeamStatus.Featured.value)
			# self.save_teams(rs_json['current_team'], self.org_uuid, TeamStatus.Current.value)
			# self.save_teams(rs_json['past_team'], self.org_uuid, TeamStatus.Past.value)
			# self.save_teams(rs_json['board_members_and_advisors'], self.org_uuid, TeamStatus.Board.value)


			# self.save_investments_invested_in(rs_json['investments'])

			# self.save_relational_entity(rs_json['sub_organizations'], self.org_uuid, self.sql_sub_organization_insert)

			# if rs_json[self.enum.CBDict.offices.value][self.enum.CBDict.items] is not None:
			# 	self.save_relational_entity(rs_json[self.enum.CBDict.offices.value], self.org_uuid, self.enum.SQL.sql_offices_exists, self.sql_offices_insert, self.office_col)

			# self.save_relational_entity(rs_json['founders'], self.org_uuid, self.sql_founders_insert)
			# self.save_relational_entity(rs_json['acquisitions'], self.org_uuid, self.sql_acquisition_insert)
			# self.save_relational_entity(rs_json['acquired_by'], self.org_uuid, self.sql_acquired_insert)
			# self.save_relational_entity(rs_json['ipo'], self.org_uuid, self.sql_ipo_insert)
			# self.save_relational_entity(rs_json['funds'], self.org_uuid, self.sql_funds_insert)
			# self.save_relational_entity(rs_json['websites'], self.org_uuid, self.sql_websites_insert)
			# self.save_relational_entity(rs_json['images'], self.org_uuid, self.sql_image_insert)
			# self.save_relational_entity(rs_json['news'], self.org_uuid, self.sql_news_insert)

				db.execute(self.orgs_summary_update.format(self.org_uuid))
			else:
				print('organization already exists.')