def youtubeuser(tag, n_pages=1): """return all unique /user/* urls returned for a search for a given query tag""" try_import('bs4', 'beautifulsoup4') from bs4 import BeautifulSoup url = 'http://www.youtube.com/results?search_query=%s&page=%d' userlist = [] for k in range(0, n_pages): user_agent = random.choice(common_user_agents) headers = {'User-Agent': user_agent} search_request = urllib.request.Request( url % (tag.replace(' ', '+'), k + 1), None, headers) try: gcontext = ssl.SSLContext( ssl.PROTOCOL_TLSv1 ) # to avoid [SSL: CERTIFICATE_VERIFY_FAILED] exception search_results = urllib.request.urlopen(search_request, context=gcontext) except AttributeError: search_results = urllib.request.urlopen(search_request) links = BeautifulSoup(search_results.read(), features="html.parser").findAll("a") for link in links: if len(link['href']) > 6 and '/user/' == link['href'][0:6]: userlist.append(str('http://www.youtube.com%s' % link['href'])) return list(set(userlist))
def __init__(self): raise ValueError('FIXME: this uses an older version of dropbox') try_import('dropbox') import dropbox # optional if self._access_token is None: self.link()
def imtransform(img, A): """Transform an numpy array image (MxNx3) following the affine or similiarity transformation A""" assert isnumpy(img) and isnumpy(A), "invalid input" try_import(cv2, 'opencv-python') import cv2 if A.shape == (2, 3): return cv2.warpAffine(img, A, (img.shape[1], img.shape[0])) else: return cv2.warpPerspective(img, A, (img.shape[1], img.shape[0]))
def s3_bucket(bucket_name, object_name, output_filename, verbose=True): """Thin wrapper for boto3""" assert 'VIPY_AWS_ACCESS_KEY_ID' in os.environ and 'VIPY_AWS_SECRET_ACCESS_KEY' in os.environ, \ "AWS access keys not found - You need to create ENVIRONMENT variables ['VIPY_AWS_ACCESS_KEY_ID', 'VIPY_AWS_SECRET_ACCESS_KEY'] with S3 access credentials" try_import('boto3', 'boto3') import boto3 s3 = boto3.client('s3', aws_access_key_id=os.environ['VIPY_AWS_ACCESS_KEY_ID'], aws_secret_access_key=os.environ['VIPY_AWS_SECRET_ACCESS_KEY'] ) s3.download_file(bucket_name, object_name, output_filename) return output_filename
def verbs(): """Return a list of verbs from verbnet that can be used to define a set of activities""" try_import('nltk') import nltk nltkdir = remkdir(os.path.join( os.environ['VIPY_CACHE'], 'nltk')) if 'VIPY_CACHE' in os.environ else tempfile.gettempdir() os.environ['NLTK_DATA'] = nltkdir print('[vipy.annotation.verbs]: Downloading verbnet to "%s"' % tempfile.gettempdir()) nltk.download('verbnet', tempfile.gettempdir()) from nltk.corpus import verbnet return verbnet.lemmas()
def num_processes(n=None, backend='joblib'): if n is not None: GLOBAL['PROCESSES'] = n if n > 1 and backend == 'dask': try_import('dask', 'dask distributed') from dask.distributed import Client client = Client(name='keynet', scheduler_port=0, dashboard_address=None, processes=True, threads_per_worker=1, n_workers=n, direct_to_workers=True, local_directory=tempfile.mkdtemp()) GLOBAL['DASK_CLIENT'] = client return GLOBAL['PROCESSES']
def s3(url, output_filename, verbose=True): assert 'VIPY_AWS_ACCESS_KEY_ID' in os.environ and 'VIPY_AWS_SECRET_ACCESS_KEY' in os.environ, \ "AWS access keys not found - You need to create ENVIRONMENT variables ['VIPY_AWS_ACCESS_KEY_ID', 'VIPY_AWS_SECRET_ACCESS_KEY'] with S3 access credentials" try_import('boto3', 'boto3') assert isS3url(url), "Invalid URL - Must be 's3://BUCKETNAME.s3.amazonaws.com/OBJECTNAME.ext'" import boto3 s3 = boto3.client('s3', aws_access_key_id=os.environ['VIPY_AWS_ACCESS_KEY_ID'], aws_secret_access_key=os.environ['VIPY_AWS_SECRET_ACCESS_KEY'] ) # url format: s3://BUCKETNAME.s3.amazonaws.com/OBJECTNAME.mp4 bucket_name = urllib.parse.urlparse(url).netloc.split('.')[0] object_name = urllib.parse.urlparse(url).path[1:] if verbose: print('[vipy.downloader.s3]: Downloading "%s" -> "%s"' % (url, output_filename)) s3.download_file(bucket_name, object_name, output_filename) return output_filename
def __init__(self, num_processes, dashboard=False): assert isinstance(num_processes, int) and num_processes >=2, "num_processes must be >= 2" from vipy.util import try_import try_import('dask', 'dask distributed') import dask from dask.distributed import Client from dask.distributed import as_completed, wait from dask.config import set as dask_config_set dask_config_set({"distributed.comm.timeouts.tcp": "50s"}) dask_config_set({"distributed.comm.timeouts.connect": "10s"}) self._num_processes = num_processes self._client = Client(name='vipy', scheduler_port=0, dashboard_address=None if not dashboard else ':0', # random port processes=True, threads_per_worker=1, n_workers=num_processes, env={'VIPY_BACKEND':'Agg'}, direct_to_workers=True, local_directory=tempfile.mkdtemp())
def scp(url, output_filename, verbose=True): """Download using pre-installed SSH keys where hostname is formatted 'scp://hostname.com:/path/to/file.jpg' """ try_import('paramiko', 'paramiko scp') try_import('scp', 'paramiko scp') import paramiko from scp import SCPClient assert 'scp://' in url, "Invalid URL" (hostname, remote_filename) = url.split('scp://')[1].split(':') if verbose: print("[vipy.downloader]: Downloading '%s' to '%s'" % (url, output_filename)) def progress(filename, size, sent): sys.stdout.write("[vipy.downloader]: %s ... %.2f%% \r" % (filename, float(sent)/float(size)*100) ) ssh = paramiko.SSHClient() ssh.load_system_host_keys() ssh.connect(hostname) scp = SCPClient(ssh.get_transport(), progress=progress if verbose else None) scp.get(remote_filename, output_filename) scp.close() return output_filename
def basic_level_categories(): """Return a list of nouns from wordnet that can be used as an initial list of basic level object categories""" try_import('nltk') import nltk nltkdir = remkdir(os.path.join( os.environ['VIPY_CACHE'], 'nltk')) if 'VIPY_CACHE' in os.environ else tempfile.gettempdir() os.environ['NLTK_DATA'] = nltkdir print( '[vipy.annotation.basic_level_categories]: Downloading wordnet to "%s"' % tempfile.gettempdir()) nltk.download('wordnet', tempfile.gettempdir()) from nltk.corpus import wordnet nouns = [] allowed_lexnames = [ 'noun.animal', 'noun.artifact', 'noun.body', 'noun.food', 'noun.object', 'noun.plant' ] for synset in list(wordnet.all_synsets('n')): if synset.lexname() in allowed_lexnames: nouns.append(str(synset.lemmas()[0].name()).lower()) nouns.sort() return nouns
import numpy as np import matplotlib.pyplot as plt from vipy.util import seq, groupby, try_import from scipy.interpolate import interp1d try_import('sklearn', 'scikit-learn') import sklearn.metrics def cumulative_match_characteristic(similarityMatrix, gtMatrix): """CMC curve for probe x gallery similarity matrix (larger is more similar) and ground truth match matrix (one +1 per row, rest zeros)""" n_categories = gtMatrix.shape[1] n_probe = gtMatrix.shape[0] rank = range(1, n_categories + 1) for i in range(0, n_probe): k = np.argsort(-similarityMatrix[i, :] ) # index of sorted rows in descending order similarityMatrix[i, :] = similarityMatrix[ i, k] # reorder columns in similarityOrder gtMatrix[i, :] = gtMatrix[i, k] # reorder ground truth in same order # Given ground truth matrix, if a row has exactly one "1" then there is a mate. If a row has all zeros, then the mate does not exist in the gallery # if a row has nan, then there is a mate in the gallery, but this was not found in the top-k n_pos = np.sum( np.array( np.logical_or((np.sum(gtMatrix, axis=1) == 1.0), np.isnan(np.sum(gtMatrix, axis=1)))).astype(np.float32)) gtMatrix = np.nan_to_num(gtMatrix) # convert nans to zeros recall = [np.sum(np.max(gtMatrix[:, 0:r], axis=1)) / n_pos for r in rank]
import os import sys from vipy.util import try_import, islist, tolist, tempdir, remkdir from itertools import repeat try_import('dask', 'dask distributed torch') from dask.distributed import as_completed, wait try_import('torch', 'torch') import torch import numpy as np import tempfile import warnings import vipy.globals class Batch(object): """vipy.batch.Batch class This class provides a representation of a set of vipy objects. All of the object types must be the same. If so, then an operation on the batch is performed on each of the elements in the batch in parallel. Examples: >>> b = vipy.batch.Batch([Image(filename='img_%06d.png' % k) for k in range(0,100)]) >>> b.bgr() # convert all elements in batch to BGR >>> b.torch() # load all elements in batch and convert to torch tensor >>> b.map(lambda im: im.bgr()) # equivalent >>> b.map(lambda im: np.sum(im.array())) >>> b.map(lambda im, f: im.saveas(f), args=['out%d.jpg' % k for k in range(0,100)]) >>> v = vipy.video.RandomSceneActivity() >>> b = vipy.batch.Batch(v, n_processes=16) >>> b.map(lambda v,k: v[k], args=[(k,) for k in range(0, len(v))]) # paralle interpolation