Пример #1
0
def main():
    args = get_args()
    ch = CompaniesHouseAPI(_API_KEY, int(args.ratelimit))
    _LAST_NUM_SC = 0
    _LAST_NUM_BR = 0
    empty_counter = 0
    empty_limit = int(args.empty_limit)
    with open(args.last_file, 'r+') as last_file:
        data = json.load(last_file)
        _LAST_NUM_BR = int(data["british_company_last_number"])
        _LAST_NUM_SC = int(data["scottish_company_last_number"])

        # British companies
        with open(args.result_file, "a+", newline='') as res:
            res.write(
                "Company, Fullname, Address, Country, City, Postal Code\n")
            writer = csv.writer(res)
            while True:
                _LAST_NUM_BR += 1
                details = get_company_details(_LAST_NUM_BR, ch)
                print(details)
                if not details:  # happens only if API returned http error or company doesn't meet our requirements
                    continue
                if details == -1:
                    print("Empty counter 1 " + str(empty_counter))

                    if empty_counter == empty_limit:
                        _LAST_NUM_BR = _LAST_NUM_BR - 1
                        print("Empty counter 2  " + str(empty_counter))
                        break
                    else:
                        empty_counter += 1
                        continue
                empty_counter = 0
                writer.writerows(details)

            # Scottish companies
            empty_counter = 0
            while True:
                _LAST_NUM_SC += 1
                details = get_company_details("SC" + str(_LAST_NUM_SC), ch)
                if not details:
                    continue
                if details == -1:
                    if empty_counter == empty_limit:
                        _LAST_NUM_SC = _LAST_NUM_SC - 1
                        break
                    else:
                        empty_counter += 1
                        continue
                empty_counter = 0
                writer.writerows(details)
        data[
            "british_company_last_number"] = _LAST_NUM_BR - empty_limit  # because we are checking 100 extra numbers
        data["scottish_company_last_number"] = _LAST_NUM_SC - empty_limit
        last_file.seek(0)
        last_file.truncate()
        json.dump(data, last_file)
        exit(0)
Пример #2
0
def get_director(number: str, ch: CompaniesHouseAPI) -> str:
    director: str = ""
    psc = ch.list_company_officers(company_number=number)
    if not psc:
        psc = ch.list_company_persons_with_significant_control(
            company_number=number)
        if not psc:
            psc = ch.list_company_persons_with_significant_control_statements(
                company_number=number)
            if not psc:
                return None

    if psc.get("active_count") == 1:
        officers = psc.get("items")
        for officer in officers:
            if officer.get("officer_role") == "director":
                director = officer.get("name")
    return director
Пример #3
0
def get_company_details(number: str, ch: CompaniesHouseAPI) -> list:
    company: dict = {}
    res = None
    try:
        company = ch.get_company(company_number=number)
    except HTTPError as e:
        print("Companies House API returned error %sn " %
              str(e))  # Sometimes companies house returns 502
        sleep(15)  # we ill just wait 15 seconds and than retry
        company = ch.get_company(company_number=number)
        if not company:
            res = None
    if company:  # checking for empty dict
        creation_date = datetime.datetime.strptime(
            company.get("date_of_creation"), "%Y-%m-%d").date()
        time_delta = (datetime.datetime.now().date() - creation_date).days
        print("Company was registered " + str(time_delta) + " days ago")
        if company.get(
                "company_status"
        ) == "active" and "registered_office_address" in company and company.get(
                'type') == "ltd":
            director = get_director(number, ch)
            name = company["company_name"]
            if director:

                address, country, city, postal_code = get_address(company)
                print(name)
                print(director)
                print(address)
                print(number)
                res = [[
                    str(name).replace(',', ' '),
                    str(director).replace(',', ' '),
                    str(address).replace(',', ' '),
                    str(country).replace(',', ' '),
                    str(city).replace(',', ' '),
                    str(postal_code).replace(',', ' ')
                ]]
                return res
    else:
        res = -1
    print(str(number) + " company does not exist or meet our requirements")
    return res
Пример #4
0
#!/home/kimgid/.virtualenvs/myproject/bin/python
from companies_house.api import CompaniesHouseAPI
ch = CompaniesHouseAPI('HwcI7GpyQ7KzwZjE9lf0cqXIlDU1M6dy0CyzgCvQ')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from fuzzywuzzy import fuzz
import time
import datetime
import requests
import pytesseract
from pdf2image import convert_from_path
from PIL import Image
import json
import os
import re


"""url = "https://beta.companieshouse.gov.uk//company/05396788/filing-history/MzI2MDEzNTAwMmFkaXF6a2N4/document?format=pdf"

r = requests.get(url)
with open('test1.pdf', 'wb') as fp:
    fp.write(r.content)"""

# Load today's links
with open('todays_links.json', 'r') as fp:
    data = json.load(fp)

matches = []

# Create test tech giants to check
#!/home/kimgid/.virtualenvs/myproject/bin/python
from companies_house.api import CompaniesHouseAPI
ch = CompaniesHouseAPI('HwcI7GpyQ7KzwZjE9lf0cqXIlDU1M6dy0CyzgCvQ')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from fuzzywuzzy import fuzz
import time

allcompanies2010 = pd.read_csv('data2010.csv').to_dict(orient='records')

company_names1 = []
for i in allcompanies2010:
    company_names1.append(i['0'])

companydata = []
for i in company_names1:
    d = {}
    time.sleep(0.55)
    d['search'] = i
    d['companiesinsearch'] = ch.search_companies(q=i)
    d['numbercompanies'] = len(d['companiesinsearch']['items'])
    companydata.append(d)
    print(d)

companydata2 = []
for i in companydata:
    for j in i['companiesinsearch']['items']:
        score = fuzz.token_sort_ratio(j['title'], i['search'])
        m = {}
        m['search'] = i['search']
Пример #6
0
    with open('player_list.csv', 'r', encoding='utf-8-sig') as f:
        csv_reader = csv.DictReader(f)
        for row in csv_reader:
            d = dict()
            d['club'] = row['Club']
            d['name'] = row['Name']
            d['position'] = row['Position']
            d['country'] = row['Country']
            player_list.append(d)
    return player_list


if __name__ == '__main__':
    with open('config.yaml', 'r') as f:
        config = yaml.safe_load(f)
    ch = CompaniesHouseAPI(config['chapi_key'])

    players = load_player_list()

    if len(players) > 600:
        print(f'Warning: {len(players)} will likely cause the API rate limit to be exceeded.')

    for player in players[0:10]:
        results = ch.search_officers(q=player['name'])
        print(f"{player['name']}: {results['total_results']}")
        for item in results['items']:
            found_name = item['title']
            lev = levenshtein_score(player['name'], found_name)
            if lev > 0.9:
                print(f"good hit: {found_name} >< {player['name']} ({lev})")
            else:
Пример #7
0
#!/home/kimgid/.virtualenvs/myproject/bin/python
from companies_house.api import CompaniesHouseAPI
ch = CompaniesHouseAPI('HwcI7GpyQ7KzwZjE9lf0cqXIlDU1M6dy0CyzgCvQ')
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
from fuzzywuzzy import fuzz
import time
import datetime

# Read the pre-gathered list of UK startup names

allcompanies2010 = pd.read_csv('companydata3.csv').to_dict(orient='records')

# Call API for filing history

todayslinks = []
df2010 = []
for j, i in enumerate(allcompanies2010):
    f = {}
    f['chnumber'] = i['chnumber']
    f['company_name'] = i['officialname']
    f['search'] = i['search']
    print('checking:')
    print(f['company_name'])
    try:
        f['filhis'] = ch.list_company_filing_history(
            company_number=f['chnumber'], items_per_page='10')['items']
        for d in f['filhis']:
            b = {}
Пример #8
0
 def setUp(self):
     self.base_api: CompaniesHouseAPIBase = CompaniesHouseAPIBase(API_KEY)
     self.api: CompaniesHouseAPI = CompaniesHouseAPI(API_KEY)
Пример #9
0
from companies_house.api import CompaniesHouseAPI
from datetime import datetime
from string import capwords
import pprint

api_key = 'gHggW0wcFUkPigIifYRo864nCxGBqqIYMLm3Pd_O'
ch = CompaniesHouseAPI(api_key)

company_request_cache = {}
officers_request_cache = {}


def get_officers(company_number):

    officers_request = None

    if company_number in officers_request_cache is True:
        officers_request = officers_request_cache[company_number]
    else:
        company_request = get_company(company_number)

        if company_request is not None:
            officers_request = ch.list_company_officers(
                company_number=company_number)

            if officers_request is not None:
                ps = datetime.fromisoformat(
                    company_request['accounts']['next_accounts']
                    ['period_start_on'])
                pe = datetime.fromisoformat(company_request['accounts']
                                            ['next_accounts']['period_end_on'])