def main(): repos = { # To już jest "tensorflow": ["tensorflow"], # "arduino": ["Arduino"], # "audacity": ["audacity"], # "capistrano": ["capistrano"], # "cloudera": ["hue"], # "floweisshardt": ["atf"], # "bardsoftware": ["ganttproject"], # "nginx": ["nginx"], # "torakiki": ["pdfsam"], # "scala": ["scala"], # "Squirrel": ["Squirrel.Windows"], # "tornadoweb": ["tornado"] # "liferay": ["liferay-learn"] } if len(repos.keys()) <= 0: print("Odkomentuj lub dodaj coś do tego pliku w zmiennej repos") return spiders = [ "githubapi", # "contributors", # 'subscribers', # "users", # 'milestones' 'releases' ] for owner in repos.keys(): for repository in repos[owner]: print(f"{owner}/{repository}") print("Scrap api") set_env_command = f"set \"REPOSITORY_OWNER={owner}\"&&set \"REPOSITORY_NAME={repository}\"" # na Windows # set_env_command = f"export REPOSITORY_OWNER={owner} && export REPOSITORY_NAME={repository}" # na Linux os.system(set_env_command) os.environ['REPOSITORY_OWNER'] = owner os.environ['REPOSITORY_NAME'] = repository for spider in spiders: scrapy_command = f"scrapy crawl {spider}" command = f"{set_env_command} && {scrapy_command}" os.system(command) Config.set_repository_owner(owner) Config.set_repository_name(repository) # print("Scrap repo") # scrap_repo() print("End")
def main(): repos = { # "tensorflow": ["tensorflow"], "arduino": ["Arduino"] } spiders = [ "githubapi", "contributors", 'subscribers', "users" ] for owner in repos.keys(): for repository in repos[owner]: for spider in spiders: set_env_command = f"export REPOSITORY_OWNER={owner} && export REPOSITORY_NAME={repository}" scrapy_command = f"scrapy crawl {spider}" command = f"{set_env_command} && {scrapy_command}" os.system(command) Config.set_repository_owner(owner) Config.set_repository_name(repository) scrap_repo()