def start_node(): try: return ipfshttpclient.connect('/dns/ipfs/tcp/5001/http', session=True) except ipfshttpclient.exceptions.ConnectionError: logger.info(f"{Log.WARNING}Waiting for node active{Log.ENDC}") time.sleep(RECURSIVE_SLEEP_REQUEST) return start_node()
def download_file(uri, _dir) -> str: """ Take from the boring centralized network :param uri: Link to file :param _dir: Where store the file? :return: Directory of stored file """ session = requests.Session() directory = "%s/resource/%s" % (HOME_PATH, _dir) dirname = os.path.dirname(directory) file_check = Path(directory) # already exists? if file_check.exists(): logger.warning(f"{Log.WARNING}File already exists: {_dir}{Log.ENDC}") return directory # Create if not exist dir Path(dirname).mkdir(parents=True, exist_ok=True) response = session.get(uri, verify=True, stream=True, timeout=60, headers={ 'User-Agent': _agents[random.randint(0, 3)] }) # Check status for response if response.status_code == requests.codes.ok: logger.warning(f"{Log.WARNING}Trying download to: {directory}{Log.ENDC}") with open(directory, "wb") as out: for block in response.iter_content(256): if not block: break out.write(block) out.close() logger.info(f"{Log.OKGREEN}File stored in: {directory}{Log.ENDC}") return directory
def results_generator(resolver) -> typing.Generator: """ Dummy resolver generator call :param resolver :returns: Iterable result """ resolver = resolver() # Init class logger.info(f"{Log.WARNING}Generating migrations from {resolver}{Log.ENDC}") return resolver(scheme) # Call class and start migration
def ingest_ipfs_file(_dir: str) -> str: """ Go and conquer the world little child!! Add file to ipfs :param _dir: The tmp dir to store it :return: The resulting CID for file """ logger.info(f"Ingesting file: {Log.BOLD}{_dir}{Log.ENDC}") _hash = ipfs.add(_dir, pin=True)['Hash'] logger.info(f"IPFS hash: {Log.BOLD}{_hash}{Log.ENDC}") return _hash
async def run(cmd): """ Start an async subprocess cmd :param cmd: Command to exec """ proc = await asyncio.create_subprocess_shell(cmd) stdout, stderr = await proc.communicate() logger.info(f'[{cmd!r} exited with {proc.returncode}]') if stdout: logger.info(f'[stdout]\n{stdout.decode()}') if stderr: logger.error(f'[stderr]\n{stderr.decode()}')
def ingest_ipfs_dir(_dir: str) -> str: """ Go and conquer the world little child!!: Add directory to ipfs :param _dir: Directory to add to IPFS :return: The resulting CID """ directory = "%s/resource/%s" % (HOME_PATH, _dir) logger.info(f"Ingesting directory: {Log.BOLD}{_dir}{Log.ENDC}") _hash = ipfs.add(directory, pin=True, recursive=True) _hash = map(lambda x: {'size': int(x['Size']), 'hash': x['Hash']}, _hash) _hash = max(_hash, key=lambda x: x['size'])['hash'] logger.info(f"IPFS hash: {Log.BOLD}{_hash}{Log.ENDC}") return _hash
def ingest_ipfs_metadata(mv: dict, max_retry=3) -> dict: """ Loop over assets, download it and add it to IPFS :param mv: MovieScheme :param max_retry: Max retries on fail before raise exception :return: Cleaned, pre-processed, structured ready schema """ try: logger.info(f"{Log.OKBLUE}Ingesting {mv.get('imdb_code')}{Log.ENDC}") # Downloading files current_imdb_code = mv.get('imdb_code') current_linked_name = mv.get('group_name', None) current_dir = current_imdb_code if current_linked_name: # If linked_name add sub-dir current_dir = f"{current_linked_name}/{current_imdb_code}" # Fetch resources if needed mv = fetch_images_resources(mv, current_dir) mv = fetch_movie_resources(mv, current_dir) # Logs on ready ingested hash_directory = ingest_ipfs_dir(current_dir) migrate_resource_hash(mv, hash_directory) migrate_image_hash(mv, hash_directory) mv['hash'] = hash_directory # Add current hash to movie logger.info(f"{Log.OKGREEN}Done {mv.get('imdb_code')}{Log.ENDC}") logger.info('\n') return clean_resources(mv) except Exception as e: if max_retry <= 0: raise OverflowError('Max retry exceeded') max_retry = max_retry - 1 logger.info(e) logger.error(f"Retry download assets error: {e}") logger.warning( f"{Log.WARNING}Wait {RECURSIVE_SLEEP_REQUEST}{Log.ENDC}") time.sleep(RECURSIVE_SLEEP_REQUEST) return ingest_ipfs_metadata(mv, max_retry)
import resolvers, asyncio __author__ = 'gmena' if __name__ == '__main__': DB_DATE_VERSION = date.today().strftime('%Y%m%d') ROOT_PROJECT = os.environ.get('PROJECT_ROOT', '/data/watchit') REFRESH_MOVIES = os.environ.get('REFRESH_MOVIES', 'False') == 'True' REFRESH_IPFS = os.environ.get('REFRESH_IPFS', 'False') == 'True' REGEN_MOVIES = os.environ.get('REGEN_MOVIES', 'False') == 'True' REGEN_ORBITDB = os.environ.get('REGEN_ORBITDB', 'False') == 'True' MIXED_RESOURCES = os.environ.get('MIXED_RESOURCES', 'False') == 'True' FLUSH_CACHE_IPFS = os.environ.get('FLUSH_CACHE_IPFS', 'False') == 'True' logger.info('Setting mongodb') logger.info("Running %s version in %s directory" % (DB_DATE_VERSION, ROOT_PROJECT)) logger.info('\n') # Initialize db list from name tmp_db_name = 'witth%s' % DB_DATE_VERSION if REGEN_MOVIES else 'witth' temp_db, cache_db = mongo.get_dbs(tmp_db_name, 'ipfs') # Check for empty db empty_tmp = temp_db.movies.count() == 0 empty_cache = cache_db.movies.count() == 0 if REFRESH_MOVIES or empty_tmp: logger.info('Rewriting...') resolvers_list = resolvers.load()
__author__ = 'gmena' RECURSIVE_SLEEP_REQUEST = 10 def start_node(): try: return ipfshttpclient.connect('/dns/ipfs/tcp/5001/http', session=True) except ipfshttpclient.exceptions.ConnectionError: logger.info(f"{Log.WARNING}Waiting for node active{Log.ENDC}") time.sleep(RECURSIVE_SLEEP_REQUEST) return start_node() logger.info(f"{Log.OKGREEN}Starting node{Log.ENDC}") ipfs = start_node() # Initialize api connection to node logger.info(f"{Log.OKGREEN}Node running {ipfs.id().get('ID')}{Log.ENDC}") logger.info('\n') def ingest_ipfs_dir(_dir: str) -> str: """ Go and conquer the world little child!!: Add directory to ipfs :param _dir: Directory to add to IPFS :return: The resulting CID """ directory = "%s/resource/%s" % (HOME_PATH, _dir) logger.info(f"Ingesting directory: {Log.BOLD}{_dir}{Log.ENDC}") _hash = ipfs.add(directory, pin=True, recursive=True)