Пример #1
0
def youtubeuser(tag, n_pages=1):
    """return all unique /user/* urls returned for a search for a given query tag"""
    try_import('bs4', 'beautifulsoup4')
    from bs4 import BeautifulSoup
    url = 'http://www.youtube.com/results?search_query=%s&page=%d'
    userlist = []
    for k in range(0, n_pages):
        user_agent = random.choice(common_user_agents)
        headers = {'User-Agent': user_agent}
        search_request = urllib.request.Request(
            url % (tag.replace(' ', '+'), k + 1), None, headers)
        try:
            gcontext = ssl.SSLContext(
                ssl.PROTOCOL_TLSv1
            )  # to avoid [SSL: CERTIFICATE_VERIFY_FAILED] exception
            search_results = urllib.request.urlopen(search_request,
                                                    context=gcontext)
        except AttributeError:
            search_results = urllib.request.urlopen(search_request)
        links = BeautifulSoup(search_results.read(),
                              features="html.parser").findAll("a")
        for link in links:
            if len(link['href']) > 6 and '/user/' == link['href'][0:6]:
                userlist.append(str('http://www.youtube.com%s' % link['href']))
    return list(set(userlist))
Пример #2
0
    def __init__(self):
        raise ValueError('FIXME: this uses an older version of dropbox')

        try_import('dropbox')
        import dropbox  # optional

        if self._access_token is None:
            self.link()
Пример #3
0
def imtransform(img, A):
    """Transform an numpy array image (MxNx3) following the affine or similiarity transformation A"""
    assert isnumpy(img) and isnumpy(A), "invalid input"
    try_import(cv2, 'opencv-python')
    import cv2
    if A.shape == (2, 3):
        return cv2.warpAffine(img, A, (img.shape[1], img.shape[0]))
    else:
        return cv2.warpPerspective(img, A, (img.shape[1], img.shape[0]))
Пример #4
0
def s3_bucket(bucket_name, object_name, output_filename, verbose=True):
    """Thin wrapper for boto3"""
    assert 'VIPY_AWS_ACCESS_KEY_ID' in os.environ and 'VIPY_AWS_SECRET_ACCESS_KEY' in os.environ, \
        "AWS access keys not found - You need to create ENVIRONMENT variables ['VIPY_AWS_ACCESS_KEY_ID', 'VIPY_AWS_SECRET_ACCESS_KEY'] with S3 access credentials"   
    try_import('boto3', 'boto3')
    import boto3                        
    s3 = boto3.client('s3',
                      aws_access_key_id=os.environ['VIPY_AWS_ACCESS_KEY_ID'],
                      aws_secret_access_key=os.environ['VIPY_AWS_SECRET_ACCESS_KEY']
    )    
    s3.download_file(bucket_name, object_name, output_filename)
    return output_filename
Пример #5
0
def verbs():
    """Return a list of verbs from verbnet that can be used to define a set of activities"""
    try_import('nltk')
    import nltk
    nltkdir = remkdir(os.path.join(
        os.environ['VIPY_CACHE'],
        'nltk')) if 'VIPY_CACHE' in os.environ else tempfile.gettempdir()
    os.environ['NLTK_DATA'] = nltkdir
    print('[vipy.annotation.verbs]: Downloading verbnet to "%s"' %
          tempfile.gettempdir())
    nltk.download('verbnet', tempfile.gettempdir())
    from nltk.corpus import verbnet
    return verbnet.lemmas()
Пример #6
0
def num_processes(n=None, backend='joblib'):
    if n is not None:
        GLOBAL['PROCESSES'] = n
        if n > 1 and backend == 'dask':
            try_import('dask', 'dask distributed')
            from dask.distributed import Client
            client = Client(name='keynet',
                            scheduler_port=0,
                            dashboard_address=None,
                            processes=True,
                            threads_per_worker=1,
                            n_workers=n,
                            direct_to_workers=True,
                            local_directory=tempfile.mkdtemp())
            GLOBAL['DASK_CLIENT'] = client

    return GLOBAL['PROCESSES']
Пример #7
0
def s3(url, output_filename, verbose=True):
    assert 'VIPY_AWS_ACCESS_KEY_ID' in os.environ and 'VIPY_AWS_SECRET_ACCESS_KEY' in os.environ, \
        "AWS access keys not found - You need to create ENVIRONMENT variables ['VIPY_AWS_ACCESS_KEY_ID', 'VIPY_AWS_SECRET_ACCESS_KEY'] with S3 access credentials"   
    try_import('boto3', 'boto3')
    assert isS3url(url), "Invalid URL - Must be 's3://BUCKETNAME.s3.amazonaws.com/OBJECTNAME.ext'"
    
    import boto3                        
    s3 = boto3.client('s3',
                      aws_access_key_id=os.environ['VIPY_AWS_ACCESS_KEY_ID'],
                      aws_secret_access_key=os.environ['VIPY_AWS_SECRET_ACCESS_KEY']
    )
    
    # url format: s3://BUCKETNAME.s3.amazonaws.com/OBJECTNAME.mp4
    bucket_name = urllib.parse.urlparse(url).netloc.split('.')[0]
    object_name = urllib.parse.urlparse(url).path[1:]

    if verbose:
        print('[vipy.downloader.s3]: Downloading "%s" -> "%s"' % (url, output_filename))
    s3.download_file(bucket_name, object_name, output_filename)
    return output_filename
Пример #8
0
    def __init__(self, num_processes, dashboard=False):
        assert isinstance(num_processes, int) and num_processes >=2, "num_processes must be >= 2"

        from vipy.util import try_import
        try_import('dask', 'dask distributed')
        import dask
        from dask.distributed import Client
        from dask.distributed import as_completed, wait
        from dask.config import set as dask_config_set
        
        dask_config_set({"distributed.comm.timeouts.tcp": "50s"})
        dask_config_set({"distributed.comm.timeouts.connect": "10s"})        
        self._num_processes = num_processes
        self._client = Client(name='vipy', 
                              scheduler_port=0, 
                              dashboard_address=None if not dashboard else ':0',  # random port
                              processes=True, 
                              threads_per_worker=1, 
                              n_workers=num_processes, 
                              env={'VIPY_BACKEND':'Agg'},
                              direct_to_workers=True,
                              local_directory=tempfile.mkdtemp())
Пример #9
0
def scp(url, output_filename, verbose=True):
    """Download using pre-installed SSH keys where hostname is formatted 'scp://hostname.com:/path/to/file.jpg' """        
    try_import('paramiko', 'paramiko scp')
    try_import('scp', 'paramiko scp')    
    import paramiko
    from scp import SCPClient
        
    assert 'scp://' in url, "Invalid URL"
    (hostname, remote_filename) = url.split('scp://')[1].split(':')

    if verbose:
        print("[vipy.downloader]: Downloading '%s' to '%s'" % (url, output_filename))
        
    def progress(filename, size, sent):
        sys.stdout.write("[vipy.downloader]: %s ... %.2f%%   \r" % (filename, float(sent)/float(size)*100) )
    
    ssh = paramiko.SSHClient()
    ssh.load_system_host_keys()
    ssh.connect(hostname)
    scp = SCPClient(ssh.get_transport(), progress=progress if verbose else None)
    scp.get(remote_filename, output_filename)
    scp.close()
    return output_filename
Пример #10
0
def basic_level_categories():
    """Return a list of nouns from wordnet that can be used as an initial list of basic level object categories"""
    try_import('nltk')
    import nltk
    nltkdir = remkdir(os.path.join(
        os.environ['VIPY_CACHE'],
        'nltk')) if 'VIPY_CACHE' in os.environ else tempfile.gettempdir()
    os.environ['NLTK_DATA'] = nltkdir
    print(
        '[vipy.annotation.basic_level_categories]: Downloading wordnet to "%s"'
        % tempfile.gettempdir())
    nltk.download('wordnet', tempfile.gettempdir())

    from nltk.corpus import wordnet
    nouns = []
    allowed_lexnames = [
        'noun.animal', 'noun.artifact', 'noun.body', 'noun.food',
        'noun.object', 'noun.plant'
    ]
    for synset in list(wordnet.all_synsets('n')):
        if synset.lexname() in allowed_lexnames:
            nouns.append(str(synset.lemmas()[0].name()).lower())
    nouns.sort()
    return nouns
Пример #11
0
import numpy as np
import matplotlib.pyplot as plt
from vipy.util import seq, groupby, try_import
from scipy.interpolate import interp1d

try_import('sklearn', 'scikit-learn')
import sklearn.metrics


def cumulative_match_characteristic(similarityMatrix, gtMatrix):
    """CMC curve for probe x gallery similarity matrix (larger is more similar) and ground truth match matrix (one +1 per row, rest zeros)"""
    n_categories = gtMatrix.shape[1]
    n_probe = gtMatrix.shape[0]
    rank = range(1, n_categories + 1)

    for i in range(0, n_probe):
        k = np.argsort(-similarityMatrix[i, :]
                       )  # index of sorted rows in descending order
        similarityMatrix[i, :] = similarityMatrix[
            i, k]  # reorder columns in similarityOrder
        gtMatrix[i, :] = gtMatrix[i, k]  # reorder ground truth in same order

    # Given ground truth matrix, if a row has exactly one "1" then there is a mate.  If a row has all zeros, then the mate does not exist in the gallery
    # if a row has nan, then there is a mate in the gallery, but this was not found in the top-k
    n_pos = np.sum(
        np.array(
            np.logical_or((np.sum(gtMatrix, axis=1) == 1.0),
                          np.isnan(np.sum(gtMatrix,
                                          axis=1)))).astype(np.float32))
    gtMatrix = np.nan_to_num(gtMatrix)  # convert nans to zeros
    recall = [np.sum(np.max(gtMatrix[:, 0:r], axis=1)) / n_pos for r in rank]
Пример #12
0
import os
import sys
from vipy.util import try_import, islist, tolist, tempdir, remkdir
from itertools import repeat
try_import('dask', 'dask distributed torch')
from dask.distributed import as_completed, wait
try_import('torch', 'torch')
import torch
import numpy as np
import tempfile
import warnings
import vipy.globals


class Batch(object):
    """vipy.batch.Batch class

    This class provides a representation of a set of vipy objects.  All of the object types must be the same.  If so, then an operation on the batch is performed on each of the elements in the batch in parallel.

    Examples:

    >>> b = vipy.batch.Batch([Image(filename='img_%06d.png' % k) for k in range(0,100)])
    >>> b.bgr()  # convert all elements in batch to BGR
    >>> b.torch()  # load all elements in batch and convert to torch tensor
    >>> b.map(lambda im: im.bgr())  # equivalent
    >>> b.map(lambda im: np.sum(im.array())) 
    >>> b.map(lambda im, f: im.saveas(f), args=['out%d.jpg' % k for k in range(0,100)])
    
    >>> v = vipy.video.RandomSceneActivity()
    >>> b = vipy.batch.Batch(v, n_processes=16)
    >>> b.map(lambda v,k: v[k], args=[(k,) for k in range(0, len(v))])  # paralle interpolation