示例#1
0
def main():

    r = requests.get(api_vars.search_startups(1,100))
    data = r.json()["data"]
    print("received " + str(len(data))+ " companies")
    slugs = []
    companies = []
    investors = []
    stages = []
    investorfunds = []
    industries = []

    # construct company objects for database insertion
    for company in data:
        search_results = {}
        search_results["CompanyName"] = company["company"]["name"]
        search_results["tiaCompanyID"] = company["company"]["id"]
        search_results["tiaURL"] = "companies/" + company["entity"]["slug"]
        search_results["Country"] = company["country"]["name"]
        search_results["LatestFundingDate"] = datetime.datetime.strptime(company["funding_round"]["date"],"%Y-%m-%d")
        search_results["LatestFundingAmount"] = int(company["funding_round"]["amount"])
        search_results["FundingStage"] = company["stage"]["name"]

        slugs.append(search_results)

    # now we get use tiaURLS to get funding data for each startup
    for company in slugs:
        company_profile = {}

        print("......processing " + company["CompanyName"])

        r = requests.get(api_vars.startup + company["tiaURL"])
        company_data = r.json()

        company_profile["CompanyName"] = company_data["name"]
        company_profile["tiaCompanyID"] = company_data["id"]
        company_profile["Country"] = company["Country"]
        company_profile["tiaURL"] = "companies/" + company_data["entity"]["slug"]
        company_profile["LatestFundingAmount"] = int(company["LatestFundingAmount"])
        company_profile["LatestFundingDate"] = company["LatestFundingDate"]
        company_profile["FundingStage"] = company["FundingStage"]
        company_profile["FoundedDate"] = datetime.datetime.strptime(company_data["date_founded"],"%Y-%m-%d")
        companies.append(company_profile)

        # build out funding stages
        for stage in company_data["funding_stages"]:
            funding_stage = {}

            for fround in stage["rounds"]:

                funding_stage["amount"] = fround["amount"]
                funding_stage["tiaFundingStageID"] = stage["id"]
                funding_stage["tiaFundingRoundID"] = fround["id"]
                funding_stage["tiaCompanyID"] = stage["company_id"]
                funding_stage["stageName"] = stage["stage"]["name"]
                funding_stage["dateClosed"] = datetime.datetime.strptime(fround["date_ended"],"%Y-%m-%d")

                stages.append(funding_stage)

                for investor in fround["participants"]:
                    investor_fund_data = {}

                    investor_fund_data["tiaInvestorID"] = investor["investor"]["id"]
                    investor_fund_data["tiaFundingStageID"] = stage["id"]
                    investor_fund_data["tiaURL"] = "companies/" + investor["investor"]["slug"]

                    investorfunds.append(investor_fund_data)

        for industry in company_data["entity"]["industries"]:
            industry_data = {}

            industry_data["tiaCompanyID"] = company_data["id"]
            industry_data["industryName"] = industry["name"]
            industry_data["industryID"] = industry["id"]

            industries.append(industry_data)

    # build out investors
    for investor in investorfunds:
        investor_data = {}
        location = ''

        r = requests.get(api_vars.startup + investor["tiaURL"])
        investor_response = r.json()

        if r.status_code != requests.codes.ok:
            continue

        for loc in investor_response["entity"]["locations"]:
            if loc["type"].lower() == "hq":
                location = loc["country"]["name"]

        print("......processing " + investor_response["name"])

        investor_data["InvestorName"] = investor_response["name"]
        investor_data["tiaInvestorID"] = investor_response["entity"]["id"]
        investor_data["InvestorType"] = investor_response["entity"]["taxonomies"][0]["name"] if investor_response["entity"]["taxonomies"] else ''
        investor_data["InvestorLocation"] = location

        investors.append(investor_data)

    ## Here to the end we run our database work
    try:
        conn = psycopg2.connect("dbname = dev_techinasia user = michaelhi host = localhost")
        print("Successfully connected to techinasia database")
    except:
        print("FAILBLOG: connection to database failed")

    createTables(conn)

    print("Inserting Data")

    InsertStartupData(conn, companies)
    InsertInvestorsToFundingStages(conn, investorfunds)
    InsertFundingStages(conn, stages)
    InsertInvestors(conn,investors)
    InsertIndustries(conn, industries)

    print("Done With Inserts, Closing Connection")

    conn.close()
示例#2
0
def search_results(page):
    '''taking 100 per page'''
    r = requests.get(api_vars.search_startups(page,100))
    data = r.json()["data"]
    return data
示例#3
0
def main():

    r = requests.get(api_vars.search_startups(1, 100))
    data = r.json()["data"]
    print("received " + str(len(data)) + " companies")
    slugs = []
    companies = []
    investors = []
    stages = []
    investorfunds = []
    industries = []

    # construct company objects for database insertion
    for company in data:
        search_results = {}
        search_results["CompanyName"] = company["company"]["name"]
        search_results["tiaCompanyID"] = company["company"]["id"]
        search_results["tiaURL"] = "companies/" + company["entity"]["slug"]
        search_results["Country"] = company["country"]["name"]
        search_results["LatestFundingDate"] = datetime.datetime.strptime(
            company["funding_round"]["date"], "%Y-%m-%d")
        search_results["LatestFundingAmount"] = int(
            company["funding_round"]["amount"])
        search_results["FundingStage"] = company["stage"]["name"]

        slugs.append(search_results)

    # now we get use tiaURLS to get funding data for each startup
    for company in slugs:
        company_profile = {}

        print("......processing " + company["CompanyName"])

        r = requests.get(api_vars.startup + company["tiaURL"])
        company_data = r.json()

        company_profile["CompanyName"] = company_data["name"]
        company_profile["tiaCompanyID"] = company_data["id"]
        company_profile["Country"] = company["Country"]
        company_profile[
            "tiaURL"] = "companies/" + company_data["entity"]["slug"]
        company_profile["LatestFundingAmount"] = int(
            company["LatestFundingAmount"])
        company_profile["LatestFundingDate"] = company["LatestFundingDate"]
        company_profile["FundingStage"] = company["FundingStage"]
        company_profile["FoundedDate"] = datetime.datetime.strptime(
            company_data["date_founded"], "%Y-%m-%d")
        companies.append(company_profile)

        # build out funding stages
        for stage in company_data["funding_stages"]:
            funding_stage = {}

            for fround in stage["rounds"]:

                funding_stage["amount"] = fround["amount"]
                funding_stage["tiaFundingStageID"] = stage["id"]
                funding_stage["tiaFundingRoundID"] = fround["id"]
                funding_stage["tiaCompanyID"] = stage["company_id"]
                funding_stage["stageName"] = stage["stage"]["name"]
                funding_stage["dateClosed"] = datetime.datetime.strptime(
                    fround["date_ended"], "%Y-%m-%d")

                stages.append(funding_stage)

                for investor in fround["participants"]:
                    investor_fund_data = {}

                    investor_fund_data["tiaInvestorID"] = investor["investor"][
                        "id"]
                    investor_fund_data["tiaFundingStageID"] = stage["id"]
                    investor_fund_data[
                        "tiaURL"] = "companies/" + investor["investor"]["slug"]

                    investorfunds.append(investor_fund_data)

        for industry in company_data["entity"]["industries"]:
            industry_data = {}

            industry_data["tiaCompanyID"] = company_data["id"]
            industry_data["industryName"] = industry["name"]
            industry_data["industryID"] = industry["id"]

            industries.append(industry_data)

    # build out investors
    for investor in investorfunds:
        investor_data = {}
        location = ''

        r = requests.get(api_vars.startup + investor["tiaURL"])
        investor_response = r.json()

        if r.status_code != requests.codes.ok:
            continue

        for loc in investor_response["entity"]["locations"]:
            if loc["type"].lower() == "hq":
                location = loc["country"]["name"]

        print("......processing " + investor_response["name"])

        investor_data["InvestorName"] = investor_response["name"]
        investor_data["tiaInvestorID"] = investor_response["entity"]["id"]
        investor_data[
            "InvestorType"] = investor_response["entity"]["taxonomies"][0][
                "name"] if investor_response["entity"]["taxonomies"] else ''
        investor_data["InvestorLocation"] = location

        investors.append(investor_data)

    ## Here to the end we run our database work
    try:
        conn = psycopg2.connect(
            "dbname = dev_techinasia user = michaelhi host = localhost")
        print("Successfully connected to techinasia database")
    except:
        print("FAILBLOG: connection to database failed")

    createTables(conn)

    print("Inserting Data")

    InsertStartupData(conn, companies)
    InsertInvestorsToFundingStages(conn, investorfunds)
    InsertFundingStages(conn, stages)
    InsertInvestors(conn, investors)
    InsertIndustries(conn, industries)

    print("Done With Inserts, Closing Connection")

    conn.close()
示例#4
0
def search_results(page):
    '''taking 100 per page'''
    r = requests.get(api_vars.search_startups(page, 100))
    data = r.json()["data"]
    return data