def main(): r = requests.get(api_vars.search_startups(1,100)) data = r.json()["data"] print("received " + str(len(data))+ " companies") slugs = [] companies = [] investors = [] stages = [] investorfunds = [] industries = [] # construct company objects for database insertion for company in data: search_results = {} search_results["CompanyName"] = company["company"]["name"] search_results["tiaCompanyID"] = company["company"]["id"] search_results["tiaURL"] = "companies/" + company["entity"]["slug"] search_results["Country"] = company["country"]["name"] search_results["LatestFundingDate"] = datetime.datetime.strptime(company["funding_round"]["date"],"%Y-%m-%d") search_results["LatestFundingAmount"] = int(company["funding_round"]["amount"]) search_results["FundingStage"] = company["stage"]["name"] slugs.append(search_results) # now we get use tiaURLS to get funding data for each startup for company in slugs: company_profile = {} print("......processing " + company["CompanyName"]) r = requests.get(api_vars.startup + company["tiaURL"]) company_data = r.json() company_profile["CompanyName"] = company_data["name"] company_profile["tiaCompanyID"] = company_data["id"] company_profile["Country"] = company["Country"] company_profile["tiaURL"] = "companies/" + company_data["entity"]["slug"] company_profile["LatestFundingAmount"] = int(company["LatestFundingAmount"]) company_profile["LatestFundingDate"] = company["LatestFundingDate"] company_profile["FundingStage"] = company["FundingStage"] company_profile["FoundedDate"] = datetime.datetime.strptime(company_data["date_founded"],"%Y-%m-%d") companies.append(company_profile) # build out funding stages for stage in company_data["funding_stages"]: funding_stage = {} for fround in stage["rounds"]: funding_stage["amount"] = fround["amount"] funding_stage["tiaFundingStageID"] = stage["id"] funding_stage["tiaFundingRoundID"] = fround["id"] funding_stage["tiaCompanyID"] = stage["company_id"] funding_stage["stageName"] = stage["stage"]["name"] funding_stage["dateClosed"] = datetime.datetime.strptime(fround["date_ended"],"%Y-%m-%d") stages.append(funding_stage) for investor in fround["participants"]: investor_fund_data = {} investor_fund_data["tiaInvestorID"] = investor["investor"]["id"] investor_fund_data["tiaFundingStageID"] = stage["id"] investor_fund_data["tiaURL"] = "companies/" + investor["investor"]["slug"] investorfunds.append(investor_fund_data) for industry in company_data["entity"]["industries"]: industry_data = {} industry_data["tiaCompanyID"] = company_data["id"] industry_data["industryName"] = industry["name"] industry_data["industryID"] = industry["id"] industries.append(industry_data) # build out investors for investor in investorfunds: investor_data = {} location = '' r = requests.get(api_vars.startup + investor["tiaURL"]) investor_response = r.json() if r.status_code != requests.codes.ok: continue for loc in investor_response["entity"]["locations"]: if loc["type"].lower() == "hq": location = loc["country"]["name"] print("......processing " + investor_response["name"]) investor_data["InvestorName"] = investor_response["name"] investor_data["tiaInvestorID"] = investor_response["entity"]["id"] investor_data["InvestorType"] = investor_response["entity"]["taxonomies"][0]["name"] if investor_response["entity"]["taxonomies"] else '' investor_data["InvestorLocation"] = location investors.append(investor_data) ## Here to the end we run our database work try: conn = psycopg2.connect("dbname = dev_techinasia user = michaelhi host = localhost") print("Successfully connected to techinasia database") except: print("FAILBLOG: connection to database failed") createTables(conn) print("Inserting Data") InsertStartupData(conn, companies) InsertInvestorsToFundingStages(conn, investorfunds) InsertFundingStages(conn, stages) InsertInvestors(conn,investors) InsertIndustries(conn, industries) print("Done With Inserts, Closing Connection") conn.close()
def search_results(page): '''taking 100 per page''' r = requests.get(api_vars.search_startups(page,100)) data = r.json()["data"] return data
def main(): r = requests.get(api_vars.search_startups(1, 100)) data = r.json()["data"] print("received " + str(len(data)) + " companies") slugs = [] companies = [] investors = [] stages = [] investorfunds = [] industries = [] # construct company objects for database insertion for company in data: search_results = {} search_results["CompanyName"] = company["company"]["name"] search_results["tiaCompanyID"] = company["company"]["id"] search_results["tiaURL"] = "companies/" + company["entity"]["slug"] search_results["Country"] = company["country"]["name"] search_results["LatestFundingDate"] = datetime.datetime.strptime( company["funding_round"]["date"], "%Y-%m-%d") search_results["LatestFundingAmount"] = int( company["funding_round"]["amount"]) search_results["FundingStage"] = company["stage"]["name"] slugs.append(search_results) # now we get use tiaURLS to get funding data for each startup for company in slugs: company_profile = {} print("......processing " + company["CompanyName"]) r = requests.get(api_vars.startup + company["tiaURL"]) company_data = r.json() company_profile["CompanyName"] = company_data["name"] company_profile["tiaCompanyID"] = company_data["id"] company_profile["Country"] = company["Country"] company_profile[ "tiaURL"] = "companies/" + company_data["entity"]["slug"] company_profile["LatestFundingAmount"] = int( company["LatestFundingAmount"]) company_profile["LatestFundingDate"] = company["LatestFundingDate"] company_profile["FundingStage"] = company["FundingStage"] company_profile["FoundedDate"] = datetime.datetime.strptime( company_data["date_founded"], "%Y-%m-%d") companies.append(company_profile) # build out funding stages for stage in company_data["funding_stages"]: funding_stage = {} for fround in stage["rounds"]: funding_stage["amount"] = fround["amount"] funding_stage["tiaFundingStageID"] = stage["id"] funding_stage["tiaFundingRoundID"] = fround["id"] funding_stage["tiaCompanyID"] = stage["company_id"] funding_stage["stageName"] = stage["stage"]["name"] funding_stage["dateClosed"] = datetime.datetime.strptime( fround["date_ended"], "%Y-%m-%d") stages.append(funding_stage) for investor in fround["participants"]: investor_fund_data = {} investor_fund_data["tiaInvestorID"] = investor["investor"][ "id"] investor_fund_data["tiaFundingStageID"] = stage["id"] investor_fund_data[ "tiaURL"] = "companies/" + investor["investor"]["slug"] investorfunds.append(investor_fund_data) for industry in company_data["entity"]["industries"]: industry_data = {} industry_data["tiaCompanyID"] = company_data["id"] industry_data["industryName"] = industry["name"] industry_data["industryID"] = industry["id"] industries.append(industry_data) # build out investors for investor in investorfunds: investor_data = {} location = '' r = requests.get(api_vars.startup + investor["tiaURL"]) investor_response = r.json() if r.status_code != requests.codes.ok: continue for loc in investor_response["entity"]["locations"]: if loc["type"].lower() == "hq": location = loc["country"]["name"] print("......processing " + investor_response["name"]) investor_data["InvestorName"] = investor_response["name"] investor_data["tiaInvestorID"] = investor_response["entity"]["id"] investor_data[ "InvestorType"] = investor_response["entity"]["taxonomies"][0][ "name"] if investor_response["entity"]["taxonomies"] else '' investor_data["InvestorLocation"] = location investors.append(investor_data) ## Here to the end we run our database work try: conn = psycopg2.connect( "dbname = dev_techinasia user = michaelhi host = localhost") print("Successfully connected to techinasia database") except: print("FAILBLOG: connection to database failed") createTables(conn) print("Inserting Data") InsertStartupData(conn, companies) InsertInvestorsToFundingStages(conn, investorfunds) InsertFundingStages(conn, stages) InsertInvestors(conn, investors) InsertIndustries(conn, industries) print("Done With Inserts, Closing Connection") conn.close()
def search_results(page): '''taking 100 per page''' r = requests.get(api_vars.search_startups(page, 100)) data = r.json()["data"] return data