Пример #1
0
def send_task(filename):
    config_fn = DEFAULT_CONFIG_FN if not filename else filename
    conf = get_config(config_fn)
    client = scaleapi.ScaleClient(conf['test_api_key'])
    try:
        ret_obj = client.create_audiotranscription_task(
                callback_url = conf.get('callback_url'),
                attachment_type = conf.get('attachment_type'),
                attachment = conf.get('attachment'),
                verbatim = conf.get('verbatim')
                )
    except scaleapi.ScaleException as e:
        sys.exit('\nStatus Code {}: {}\n\n(exited with error code 1)\n'.format(e.code, str(e)))
    nod = lambda x: "Yes." if True else "No."
    fmt_strs = (ret_obj.task_id,
            ret_obj.created_at,
            nod(ret_obj.is_test),
            ret_obj.callback_url,
            json.dumps(ret_obj.params, indent=4))
    message = "Task (task_id={}) created at {}.\nWas this a test? {}\n\
Make sure this URL is listening for POST requests: {}\n\
Here are the parameters you used:\n{}".format(*fmt_strs)
    create_log_filepath = get_log_filepath(conf) + 'task_{}_create.log'.format(ret_obj.created_at)
    with open(create_log_filepath, 'w') as create_log_file:
        create_log_file.write(message)
    print("\n{}\n\n(A file containing this message has been written to `{}`.)\n"\
            .format(message, create_log_filepath))
Пример #2
0
 def __init__(self, session_path='', key=None):
     # Initialize client key.
     if not key:  # Use test key.
         key = 'test_################################'  # test key
     self.client = scaleapi.ScaleClient(key)
     self.tasks = []
     self.taskdir = os.path.join(session_path, 'label')
     self.logpath = os.path.join(session_path, 'log', 'scaleapi.log')
Пример #3
0
    def connect(self):
        """
            This should be the first method called to connect to the source.
            All implementations should return a ConnectionError exception in case
            connection is unsuccessful.
        :return:
        """

        client = scaleapi.ScaleClient(self.auth_data['client_secret'])
        self.connection_client = client
        return True
# coding: utf-8

import pytest
import scaleapi
import time
from datetime import datetime
import os

try:
    test_api_key = os.environ['SCALE_TEST_API_KEY']
    client = scaleapi.ScaleClient(test_api_key)
except KeyError:
    raise Exception(
        "Please set the environment variable SCALE_TEST_API_KEY to run tests.")


def make_a_task():
    return client.create_comparison_task(
        callback_url='http://www.example.com/callback',
        instruction='Do the objects in these images have the same pattern?',
        attachment_type='image',
        attachments=[
            'http://i.ebayimg.com/00/$T2eC16dHJGwFFZKjy5ZjBRfNyMC4Ig~~_32.JPG',
            'http://images.wisegeek.com/checkered-tablecloth.jpg'
        ],
        choices=['yes', 'no'])


def test_categorize_ok():
    task = client.create_categorization_task(
        callback_url='http://www.example.com/callback',
Пример #5
0
def main(cfg, args):
    scaleapi_key = cfg.get("CommonAccountKeys", 'ScaleAPIKey')
    client = scaleapi.ScaleClient(scaleapi_key)

    # create output folder
    output_dir = args.project
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    # create list to collect all tasks
    all_tasks = []
    counter = 0
    next_token = None

    while (True):
        tasks = client.tasks(
            start_time=(now - timedelta(days=args.ago)).strftime("%Y-%m-%d"),
            next_token=next_token,
            project=cfg.get("CommonAccountKeys", 'ScaleAccountName'),
        )
        for task in tasks:
            counter += 1
            # print(f'Downloading Task {counter} | ${task.task_id}')
            all_tasks.append(task)
            next_token = tasks.next_token
        if (next_token == None):
            break

    results = []
    rater_stats = []

    for task in all_tasks:
        # Filter out results that are not a part of the interested project
        if task.param_dict['metadata']['group'] != args.project:
            continue

        for file_url in task.param_dict['metadata']['file_urls']:
            clip_dict = {
                'short_file_name':
                task.param_dict['metadata']['file_shortname']
            }
            clip_dict['model'] = file_url
            clip_dict['file_url'] = task.param_dict['metadata']['file_urls'][
                file_url]
            ratings = task.param_dict['response'][file_url]['responses']
            rater_stats.extend(ratings)
            for i in range(len(ratings)):
                vote = 'vote_' + str(i + 1)
                clip_dict[vote] = ratings[i]['rating']
            clip_dict['MOS'] = np.mean(
                [rating['rating'] for rating in ratings])
            clip_dict['n'] = len(ratings)
            clipset_match = re.match(
                '.*[/](?P<clipset>audioset|ms_realrec|noreverb_clnsp|reverb_clnsp|stationary)',
                clip_dict['file_url'])
            clip_dict['clipset'] = clipset_match.groupdict()['clipset']
            results.append(clip_dict)

    df = pd.DataFrame(results)
    df_rater = pd.DataFrame(rater_stats)
    df.to_csv(
        os.path.join(
            output_dir,
            "Batch_{0}_per_clip_results.csv".format(now.strftime("%m%d%Y"))))
    df_rater.to_csv(
        os.path.join(
            output_dir,
            "Batch_{0}_rater_stats.csv".format(now.strftime("%m%d%Y"))))
    model_pivot_table = df.pivot_table(values='MOS',
                                       index='model',
                                       columns='clipset',
                                       margins=True,
                                       margins_name='Overall',
                                       aggfunc=[np.mean, len, np.std])
    model_pivot_table = model_pivot_table.swaplevel(axis=1)
    model_pivot_table.drop('Overall', inplace=True)
    for cols in model_pivot_table.columns.levels[0]:
        model_pivot_table.loc[:, (
            cols, 'CI')] = model_pivot_table.loc[:, cols].apply(
                lambda x: 1.96 * x['std'] / np.sqrt(x['len']), axis=1)
        model_pivot_table.loc[:, (
            cols, 'DMOS')] = model_pivot_table.loc[:, cols].apply(
                lambda x: x['mean'] - model_pivot_table.loc['noisy',
                                                            (cols, 'mean')],
                axis=1)

    model_pivot_table = model_pivot_table.sort_values(
        ('Overall', 'mean'), ascending=False).sort_index(axis=1,
                                                         ascending=False)
    model_pivot_table.to_csv(
        os.path.join(
            output_dir, "Batch_{0}_per_condition_results.csv".format(
                now.strftime("%m%d%Y"))))
def double_box(
):  # What if the tasker used two boxes to represent one object? Use IoU metric
    pass


def check_bottom(
):  # Check for traffic signs - they shoud be located on top and on left/right (Assuming all pictures are taken from cars)
    pass


def aspect_ratio():  # Check for traffic lights - Color should be 'other'
    pass


client = scaleapi.ScaleClient('live_74275b9b2b8b44d8ad156db03d2008ed')

tasks = client.tasks(project="Traffic Sign Detection")

with open(OUTPUT_FILE_PATH, 'w') as csvfile:
    csv_fieldnames = [
        'task_id', 'task_completed', 'bounding_box_uuid', 'status',
        'description'
    ]
    writer = csv.DictWriter(csvfile, fieldnames=csv_fieldnames)
    writer.writeheader()

    for task in tasks:

        task_id = task.param_dict['task_id']
        task_status = task.param_dict['status']
Пример #7
0
import scaleapi

client = scaleapi.ScaleClient('live_953b8b2557c6492c92f79c7b5a4fdcf6')

tasks = client.tasks(project="Traffic Sign Detection")
Пример #8
0
def _scale_common(name: str,
                  path: str,
                  project: str,
                  batch: Union[str, List[str]] = None) -> Dataset:  # noqa
    images: Dataset.Images = []
    classes: Dataset.Classes = []
    annotations: Dataset.Annotations = {}

    scale_data_path = os.path.join(path, 'images')

    available_batches = requests.get(
        "https://api.scale.com/v1/batches?project={}".format(project),
        headers=HEADERS,
        auth=(SCALE_API_KEY, '')).json()

    batch_names = [
        b['name'] for b in available_batches['docs']
        if b['status'] == 'completed'
    ]
    if (batch or batch == '') and isinstance(
            batch, str) and batch not in batch_names + ['']:
        raise ValueError("Batch name {} does not exist".format(batch))

    if batch and isinstance(batch, list):
        for bat in batch:
            if bat not in batch_names:
                raise ValueError("Batch name {} does not exist".format(bat))

    client = scaleapi.ScaleClient(SCALE_API_KEY)

    if batch is None:
        batches_to_retrieve = batch_names
    elif isinstance(batch, str) or batch == '':
        batches_to_retrieve = [batch]
    else:
        batches_to_retrieve = batch
    print(batches_to_retrieve)
    regex = "([\w-]){33}|([\w-]){19}"  # noqa

    for batch_name in batches_to_retrieve:
        print('On Batch', batch_name)
        proper_batch_name = batch_name if batch_name else 'default'
        batch_path = os.path.join(scale_data_path, proper_batch_name)

        count = 0
        offset = 0
        has_next_page = True
        needs_download = False

        if not os.path.exists(scale_data_path):
            os.makedirs(scale_data_path)

        if not os.path.exists(batch_path):
            os.makedirs(batch_path)
            needs_download = True

        while has_next_page:
            tasklist = client.tasks(status="completed",
                                    project=project,
                                    batch=batch_name,
                                    offset=offset)
            offset += 100

            for obj in tasklist:
                task_id = obj.param_dict['task_id']
                task = client.fetch_task(task_id)
                bbox_list = task.param_dict['response']['annotations']
                img_url = task.param_dict['params']['attachment']

                try:
                    if 'drive.google.com' in img_url:
                        match = re.search(regex, img_url)
                        task_id = match[0]  # noqa
                        api_initialize()
                        file_name = print_file_metadata(SERVICE,
                                                        task_id)  # noqa
                        local_path = os.path.join(batch_path, file_name)
                        # request_=urllib.request.Request(img_url,None,headers)
                        # remotefile = urllib.request.urlopen(request_)
                        # #remotefile = urllib.request.urlopen(img_url)
                        # content = remotefile.info()['Content-Disposition']
                        # _, params = cgi.parse_header(content)
                        # local_path = os.path.join(batch_path, params["filename"])
                    else:
                        local_path = os.path.join(batch_path,
                                                  img_url.rsplit('/', 1)[-1])

                    if needs_download or not os.path.isfile(local_path):
                        print('Batch Path', batch_path)
                        print('Local Path', local_path)
                        # Download the image
                        # urllib.request.urlretrieve(img_url, local_path)
                        request_ = urllib.request.Request(
                            img_url, None, headers)
                        response = urllib.request.urlopen(request_)
                        local_file = open(local_path, 'wb')
                        local_file.write(response.read())
                        local_file.close()

                except HTTPError as error:
                    print(
                        "Image {} failed to download due to HTTPError {}: {}".
                        format(img_url, error.code, error.reason))
                    continue
                except TypeError as error:
                    print(
                        "Image {} failed to download due to improper header: {}"
                        .format(img_url, str(error)))
                    continue

                annotations[local_path] = []
                for bbox in bbox_list:
                    # Get the label of the detected object
                    detection_class = bbox['label']

                    # Calculate the position and dimensions of the bounding box
                    x_min = int(
                        bbox['left'])  # x-coordinate of top left corner
                    y_min = int(bbox['top'])  # y-coordinate of top left corner
                    width = int(bbox['width'])  # width of the bounding box
                    height = int(bbox['height'])  # height of the bounding box

                    # Get the class index if it has already been registered
                    # otherwise register it and select the index
                    try:
                        class_index = classes.index(detection_class)
                    except ValueError:
                        class_index = len(classes)
                        classes.append(detection_class)

                    # Package the detection
                    annotations[local_path].append(
                        Object2D(Bounds2D(x_min, y_min, width, height),
                                 class_index))

                images.append(local_path)
                print("Processed {}\r".format(local_path), end="")
                count += 1

                if len(tasklist) < 100 or count > MAX_TO_PROCESS:
                    has_next_page = False

    return Dataset(name, images, classes, annotations)
Пример #9
0
 def __init__(self, api_key):
     if not api_key:
         raise Exception('No API key provided')
     self.api_key = api_key
     self.client = scaleapi.ScaleClient(self.api_key)
Пример #10
0
import os
from os.path import join, dirname
from dotenv import load_dotenv
import scaleapi

load_dotenv(verbose=True)

dotenv_path = join(dirname(__file__), '.env')
load_dotenv(dotenv_path)

API_TOKEN = os.environ.get("API_TOKEN")

client = scaleapi.ScaleClient(API_TOKEN)

## create project
# project = client.create_project(project_name="test_project_2", type="imageannotation", params = {
#     "instruction": "This is a test project"
# })
# print(project)

## Create task

tasks = [
    "gs://books-jp/bulksplash-ananogrey-HESw7t6aQqI.jpg",
    "gs://books-jp/bulksplash-aquintero210-xvlzYmY3Tz8.jpg",
    "gs://books-jp/bulksplash-artcoastdesign-oJ_5bY6KCag.jpg"
]

for task in tasks:
    res = client.create_imageannotation_task(
        project='test_project_2',