Exemplo n.º 1
0
    def add_new_subject(self, image_list, metadata_list, subject_set_name):
        """
        Add a subject and the metadata.  image_list and metadata_list must be
        of equal length
        :param image_list: list of images to be added
        :param metadata_list: list of metadata to be added
        :return:
        """

        # Start by making sure we have two equal length list
        if len(image_list) != len(metadata_list):
            print("Image list and metadata list do not match")

        # Link to the subject set we want
        subject_set = SubjectSet()
        subject_set.links.project = self.project
        subject_set.display_name = subject_set_name
        subject_set.save()

        # Go through the image and metadata list and add the items
        new_subjects = []
        for i in range(len(image_list)):
            subject = Subject()
            subject.links.project = self.project
            subject.add_location(image_list[i])
            subject.metadata.update(metadata_list[i])
            subject.save()
            new_subjects.append(subject)

        subject_set.add(new_subjects)
    def push_new_row_subjects(self, source_subject, target_subject_set_id, row_paths_by_column):
        """
        Given image paths for the new column-indexed rows (row_paths_by_column), push new
        unclassified row subjects to the appropriate subject set, with metadata references to the
        source subject and column.
        """
        project = Project.find(settings.PROJECT_ID)

        subject_set_unclassified_rows = SubjectSet.find(target_subject_set_id)

        new_row_subjects = []

        for column_index, row_paths in row_paths_by_column.items():
            self._logger.info('Creating %d new row subjects for column index %d for subject %s',
                              len(row_paths), column_index, source_subject.id)
            for row_path in row_paths:
                new_subject = Subject()
                new_subject.links.project = project
                copy_source_metadata_fields = ['book', 'page']
                for copy_field in copy_source_metadata_fields:
                    new_subject.metadata[copy_field] = source_subject.metadata[copy_field]
                new_subject.metadata['source_document_subject_id'] = source_subject.id
                new_subject.metadata['source_document_column_index'] = column_index
                new_subject.add_location(row_path)
                new_subject.save()

                new_row_subjects.append(new_subject)

        subject_set_unclassified_rows.add(new_row_subjects)
Exemplo n.º 3
0
def pushSubject(subjectSet, project, imageLocations, metadata, livePost):

    if (livePost):
        subject = Subject()
        subject.links.project = project

        for image in imageLocations:
            subject.add_location(image)

        subject.metadata.update(metadata)

        notSaved = True
        while (notSaved):
            notSaved = False
            try:
                subject.save()
            except ConnectionError as e:
                print('{} , TRYING AGAIN'.format(e))
                notSaved = True

        subjectSet.add(subject)

        return subject

    else:
        return None
Exemplo n.º 4
0
def save_subject(manifest_item, project, pbar=None):
    """

    Add manifest item to project. Note: follow with subject_set.add(subject) to associate with subject set.
    Args:
        manifest_item (dict): of form {png_loc: img.png, key_data: some_data_dict}
        project (str): project to upload subject too e.g. '5773' for Galaxy Zoo
        pbar (tqdm.tqdm): progress bar to update. If None, no bar will display.

    Returns:
        None
    """
    subject = Subject()

    subject.links.project = project
    assert os.path.exists(manifest_item['png_loc'])
    subject.add_location(manifest_item['png_loc'])
    subject.metadata.update(manifest_item['key_data'])

    subject.save()

    if pbar:
        pbar.update()

    return subject
def make_tutorial_images(imagePaths, ellipseData, projectData):
    # Connect to Panoptes
    Panoptes.connect(
        username=projectData["user_name"], password=projectData["password"]
    )

    newSubjects = []
    for imageId, imagePath in enumerate(imagePaths):
        print(f"Adding {imagePath}...")
        try:
            subjectSet = SubjectSet.find(projectData["subject_set"])
        except PanoptesAPIException as e:
            print(e)
            return
        newSubject = Subject()
        newSubject.add_location(imagePath)
        newSubject.links.project = subjectSet.links.project
        newSubject.metadata.update(
            make_metadata(
                ellipseData.get_group(imageId).reset_index(drop=True), imagePath
            )
        )
        newSubject.save()
        newSubjects.append(newSubject)
    subjectSet.add(newSubjects)
Exemplo n.º 6
0
def create_subject(project, metadata, media_files):
    subject = Subject()
    subject.links.project = project
    for media_file in media_files:
        subject.add_location(media_file)
    subject.metadata.update(metadata)
    subject.save()
    return subject
Exemplo n.º 7
0
    def _create_subject(self, project_id, filename, metadata=None):
        subject = Subject()

        subject.links.project = Project.find(project_id)
        subject.add_location(filename)

        if metadata:
            subject.metadata.update(metadata)

        subject.save()

        return subject
def create_subject(project, media_files, metadata):
    """ Create a subject
        Args:
        - project: a Project() object defining the Zooniverse project
        - media_files: a list of media files to link to the subject
        - metadata: a dictionary with metadata to attach
    """
    subject = Subject()
    subject.links.project = project
    for media in media_files:
        subject.add_location(media)
    subject.metadata.update(metadata)
    subject.save()
    return subject
Exemplo n.º 9
0
def upload_subject(locations: List, project: Project, subject_set_name: str,
                   metadata: Dict):
    subject = Subject()
    # add files
    subject.links.project = project
    for location in locations:
        if not os.path.isfile(location):
            raise FileNotFoundError(
                'Missing subject location: {}'.format(location))
        subject.add_location(location)

    subject.metadata.update(metadata)

    subject_set_name = subject_set_name
    subject_set = get_or_create_subject_set(project.id, subject_set_name)

    subject.save()
    subject_set.add(subject)
    return subject.id
Exemplo n.º 10
0
def upload_images(id, use_database=True):
    print('Create subject set and upload images for', id)
    if use_database:
        update_status(id, gz_status='Uploading')
    wd = os.getcwd()
    Panoptes.connect(username='******',
                     password=os.environ['PANOPTES_PASSWORD'])
    os.chdir(target + id)
    project = Project.find(slug='chrismrp/radio-galaxy-zoo-lofar')
    subject_set = SubjectSet()

    subject_set.display_name = id
    subject_set.links.project = project
    subject_set.save()
    print('Made subject set')
    new_subjects = []
    g = glob.glob('*-manifest.txt')
    for i, f in enumerate(g):
        bits = open(f).readlines()[0].split(',')
        metadata = {
            'subject_id': int(bits[0]),
            'ra': float(bits[5]),
            'dec': float(bits[6]),
            '#size': float(bits[7]),
            'source_name': bits[4]
        }
        print('Upload doing', bits[4], '%i/%i' % (i, len(g)))
        subject = Subject()
        subject.links.project = project
        subject.metadata.update(metadata)
        for location in bits[1:4]:
            subject.add_location(location)
        subject.save()
        new_subjects.append(subject)

    subject_set.add(new_subjects)

    workflow = Workflow(11973)
    workflow.links.subject_sets.add(subject_set)
    if use_database:
        update_status(id, gz_status='In progress')
    print('Done!')
Exemplo n.º 11
0
 def _create_subjects_from_epicollect5(self, project, subjects_metadata):
     subjects = list()
     for metadata in subjects_metadata:
         subject = Subject()
         subject.metadata['id'] = metadata['id']
         subject.metadata['project'] = metadata['project']
         subject.metadata['obs_type'] = metadata['obs_type']
         subject.metadata['source'] = metadata['source']
         subject.metadata['url'] = metadata['url']
         subject.metadata['created_at'] = metadata['created_at']
         subject.metadata['observer'] = metadata['observer']
         subject.metadata['longitude'] = metadata['location']['longitude']
         subject.metadata['latitude'] = metadata['location']['latitude']
         subject.metadata['comment'] = metadata['comment']
         subject.metadata['spectrum_type'] = metadata.get(
             'spectrum_type', "?")
         subject.add_location({'image/jpg': metadata['url']})
         subject.links.project = project
         subject.save()
         subjects.append(subject)
     return subjects
Exemplo n.º 12
0
    def create_subjects_and_link_to_project(self, proto_subjects, project_id,
                                            workflow_id, subject_set_id):

        try:
            USERNAME = os.getenv('PANOPTES_USERNAME')
            PASSWORD = os.getenv('PANOPTES_PASSWORD')
            Panoptes.connect(username=USERNAME,
                             password=PASSWORD,
                             endpoint=self.ENDPOINT)

            project = Project.find(project_id)
            workflow = Workflow().find(workflow_id)

            if subject_set_id == None:
                subject_set = SubjectSet()
                ts = time.gmtime()
                subject_set.display_name = time.strftime(
                    "%m-%d-%Y %H:%M:%S", ts)
                subject_set.links.project = project

                subject_set.save()
            else:
                subject_set = SubjectSet().find(subject_set_id)
            subjects = []
            for proto_subject in proto_subjects:
                subject = Subject()
                subject.links.project = project
                subject.add_location(proto_subject['location_lc'])
                subject.add_location(proto_subject['location_ps'])
                subject.metadata.update(proto_subject['metadata'])
                subject.save()
                subjects.append(subject)

            subject_set.add(subjects)
            workflow.add_subject_sets(subject_set)
        except Exception:
            self.log.exception("Error in create_subjects_and_link_to_project ")
Exemplo n.º 13
0
def main():
    ap = argparse.ArgumentParser(
        description=
        'Given a list of images, bins them into subject sets of size n')

    # require file path to read in images
    ap.add_argument('-f',
                    '--filename',
                    required=True,
                    dest='filename',
                    type=str,
                    help='The name of the file from which to read the images')

    # optionally require subject set size; defaults to 1000
    ap.add_argument(
        '-n',
        '--size',
        required=False,
        dest='n',
        type=int,
        default=1000,
        help='The maximum number of images a subject set should contain. \
                          The value should be between 1 and 10000, inclusive')

    # parse args into variables and check values
    args = vars(ap.parse_args())

    filename = args['filename'] if args['filename'] else None
    n = args['n'] if args['n'] else None

    if not (n >= 1 and n <= 10000):
        raise ValueError('n must be between 1 and 10000, inclusive')

    # connect to zooniverse
    Panoptes.connect(username=zooniverse_config.Zooniverse_USERNAME,
                     password=zooniverse_config.Zooniverse_PASS)
    project = Project.find(zooniverse_config.Project_ID)

    # connection to mongodb
    mongoConn = MongoClient(csh_db_config.DB_HOST + ":" +
                            str(csh_db_config.DB_PORT))
    cshTransDB = mongoConn[csh_db_config.TRANSCRIPTION_DB_NAME]
    cshTransDB.authenticate(csh_db_config.TRANSCRIPTION_DB_USER,
                            csh_db_config.TRANSCRIPTION_DB_PASS)
    cshCollection = cshTransDB[csh_db_config.TRANS_DB_MeetingMinColl]

    # track subject sets being created
    subjectSets = []

    # get the image filenames in a Python list
    with open(filename) as handle:
        filenames = handle.readlines()

    # divide files into groups of n
    filegroups = list([e for e in t if e != None]
                      for t in itertools.zip_longest(*([iter(filenames)] * n)))

    for group in filegroups:
        displayName = '{:%Y-%b-%d %H:%M:%S}'.format(datetime.datetime.now())

        # create a new subject set
        subjectSet = SubjectSet()
        subjectSet.links.project = project
        subjectSet.display_name = displayName
        subjectSet.save()

        subjectSetId = subjectSet.id
        subjectSets.append(subjectSetId)

        # create a new subject for each file and add to the subject set
        for filename in group:
            # remove trailing '\n' character
            filename = filename.rstrip()

            # create a new subject
            subject = Subject()
            subject.links.project = project

            filepath = cshCollection.find_one({'_id':
                                               filename})['file']['anonPath']
            subject.add_location(filepath)
            subject.metadata['ID'] = filename
            subject.save()

            # add to subject set
            subjectSet.add(subject)

            # retrieve and update the record from mongodb
            updateQuery = {
                '$set': {
                    'canCrowdsource': True,
                    'transcription': {
                        'numClassifications': 5,
                        'subjectSetId': subjectSetId,
                        'status': 'sent'
                    }
                }
            }
            record = cshCollection.find_one_and_update({'_id': filename},
                                                       updateQuery)

    # add subject sets to the workflow
    workflow = project.links.workflows[0]
    workflow.add_subject_sets(subjectSets)

    # print helpful information to the console
    print('{} subject sets created with the following IDs: {}'.format(
        len(subjectSets), subjectSets))
Exemplo n.º 14
0
new_subjects = 0
old_subjects = 0
failed_subjects = 0
working_on = []
#  loop over the preloaded manifest file
for metadata in manifest_list:
    working_on = [metadata['subject'], metadata['image1']]
    #  test for previously uploaded
    if metadata['image1'] not in previous_subjects:
        try:
            subject = Subject()
            subject.links.project = project
            #  find the files in the metadata listing and add their locations
            for file in list(metadata.values())[1:]:
                if file.find('.jpg') > 0:
                    subject.add_location(directory + os.sep + file)
            # update subject metadata
            subject.metadata.update(metadata)
            # again nothing happens until these wo line below, comment them out for testing
            subject.save()
            subject_set.add(subject.id)
            new_subjects += 1
            build_part = '{} successfully uploaded at {}'.format(working_on, str(datetime.now())[0:19]) + '\n'
        except panoptes_client.panoptes.PanoptesAPIException:
            failed_subjects += 1
            build_part = 'An error occurred during the upload of {}'.format(working_on) + '\n'
    else:
        old_subjects += 1
        build_part = '{} previously uploaded'.format(working_on) + '\n'
    print(build_part, end='')
    if save:
Exemplo n.º 15
0
subject_metadata = {}
for f, file in enumerate(files):
    subject_metadata[file] = {'file': file, 'subject_reference': f}

Panoptes.connect(username=username, password=password)
# tutorial_project = Project()
tutorial_project = Project.find(7699)
# tutorial_project.display_name = display_name
# tutorial_project.description = description
# tutorial_project.primary_language = 'en'
# tutorial_project.private =True
# tutorial_project.save()

subject_set = SubjectSet()
subject_set.links.project = tutorial_project
subject_set.display_name = subject_name
subject_set.save()

tutorial_project.reload()
print(tutorial_project.links.subject_sets)

new_subjects = []
for filename, metadata in tqdm.tqdm(subject_metadata.items()):
    subject = Subject()
    subject.links.project = tutorial_project
    subject.add_location(filename)
    subject.metadata.update(metadata)
    subject.save()
    new_subjects.append(subject)

subject_set.add(new_subjects)
        quit()
    # create a new subject set for the new data and link it to the project above
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = set_name
    subject_set.save()

print('Uploading subjects, this could take a while!')
new_subjects = 0
old_subjects = 0
for filename, metadata in subject_metadata.items():
    try:
        if filename not in previous_subjects:
            subject = Subject()
            subject.links.project = project
            subject.add_location(location + os.sep + filename)
            subject.metadata.update(metadata)
            subject.save()
            subject_set.add(subject.id)
            print(filename)
            new_subjects += 1
        else:
            old_subjects += 1
    except panoptes_client.panoptes.PanoptesAPIException:
        print('An error occurred during the upload of ', filename)
print(new_subjects, 'new subjects created and uploaded', old_subjects,
      'already uploaded')

uploaded = 0
with open(location + os.sep + 'Uploaded subjects.csv', 'wt') as file:
    subject_set = SubjectSet.where(project_id=project.id,
except StopIteration:
    # Crea un nuevo subject set para los nuevos datos y lo asocia al proyecto.
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = image_set_name
    subject_set.save()

# Adicción de las muestras al subject set.
with open(manifest_images_file, 'r') as mani_file:
    print('Uploading image_set')
    r = csv.DictReader(mani_file)
    for line in r:
        subject = Subject()
        subject.links.project = project

        subject.add_location(line['lc'])
        subject.add_location(line['sp'])
        subject.metadata['subject_id'] = line['id']
        subject.save()
        subject_set.add(subject.id)

# ------- Subject set de sonidos -------
# Conexión con el subject set correspondiente o creación de uno nuevo en caso
# de que este no exista.
try:
    # Comprueba si existe el subject set.
    subject_set = SubjectSet.where(project_id=project.id,
                                   display_name=audio_set_name).next()
except StopIteration:
    # Crea un nuevo subject set para los nuevos datos y lo asocia al proyecto.
    subject_set = SubjectSet()
images = [a['src'] for a in soup.find_all("img", {"src": re.compile("gstatic.com")})]
#print images
for img in images:
  raw_img = urllib2.urlopen(img).read()
  #add the directory for your image here
  DIR="images/"
  cntr = len([i for i in os.listdir(DIR) if image_type in i]) + 1
  f = open(DIR + image_type + "_"+ str(cntr)+".jpg", 'wb')
  f.write(raw_img)
  f.close()

print 'Creating image set...'

# create the subject set.
subject_set = SubjectSet()
subject_set.links.project = p
subject_set.display_name = "Images of " + thing + '\'s'
subject_set.save()

print 'Uploading images to Zooniverse...'

# add all images to subject set
for i in range(1,21):
    subject = Subject()
    subject.links.project = p
    subject.add_location('images/' + str(thing) + '_' + str(i)+'.jpg')
    subject.save()
    subject_set.add(subject)

print 'Complete.'
Exemplo n.º 19
0
        # get data-time from original video file
        try:
            video_data = FFProbe(location + os.sep + original_file)
            datetime = video_data.metadata['creation_time']
        except (IOError, KeyError, TypeError):
            print('Acquiring exif data for ', original_file, ' failed')
            datetime = ''

        # finally we are ready for the actual upload of the modified file:
        try:
            subject = Subject()
            subject.links.project = project
            compress(location + os.sep + original_file)
            print('Compressed ', original_file, 'to',
                  os.path.getsize('temp.mp4'), 'bytes, uploading....')
            subject.add_location('temp.mp4')
            videos_uploaded += 1
            # update the subject metadata (add '#' to the beginning of the field name to hide that field)
            subject.metadata['Site_Date'] = set_name
            subject.metadata['Filename'] = original_file
            subject.metadata['Date_time'] = datetime
            # nothing is actually uploaded to panoptes until the save is executed.
            # for testing without actually uploading anything comment out the following two lines
            subject.save()
            subject_set.add(subject.id)
        except panoptes_client.panoptes.PanoptesAPIException:
            print('An error occurred during the upload of ', original_file)
print(videos_uploaded, 'videos uploaded')
# cleanup the temporary file at the end
if os.path.isfile('temp.mp4'):
    os.remove('temp.mp4')
Exemplo n.º 20
0
        previous_subjects.append(subject.metadata['Filename'])
except StopIteration:
    # create a new subject set for the new data and link it to the project above
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = set_name
    subject_set.save()

print('Uploading subjects, this could take a while!')
new_subjects = 0
for filename, metadata in subject_metadata.items():
    try:
        if filename not in previous_subjects:
            subject = Subject()
            subject.links.project = project
            subject.add_location(compress(args.image_dir, filename, 960))
            subject.metadata.update(metadata)
            subject.save()
            subject_set.add(subject.id)
            new_subjects += 1
    except panoptes_client.panoptes.PanoptesAPIException:
        print('An error occurred during the upload of ', filename)
print(new_subjects, 'new subjects created and uploaded')
print('Uploading complete, Please wait while the full subject listing is prepared and saved in')

output_file = "uploaded_subjects.csv"

print('"%s" in the drive with the original images' % output_file)

uploaded = 0
with open(os.path.join(args.image_dir, output_file), 'wt') as file_up:
Exemplo n.º 21
0
def upload_subjects(
    subject_set_id,
    manifest_files,
    allow_missing,
    remote_location,
    mime_type,
    file_column,
):
    """
    Uploads subjects from each of the given MANIFEST_FILES.

    Example with only local files:

    $ panoptes subject-set upload-subjects 4667 manifest.csv

    Local filenames will be automatically detected in the manifest and
    uploaded, or filename columns can be specified with --file-column.

    If you are hosting your media yourself, you can put the URLs in the
    manifest and specify the column number(s):

    $ panoptes subject-set upload-subjects -r 1 4667 manifest.csv

    $ panoptes subject-set upload-subjects -r 1 -r 2 4667 manifest.csv

    Any local files will still be detected and uploaded.
    """
    if (
        len(manifest_files) > 1
        and any(map(lambda m: m.endswith('.yaml'), manifest_files))
    ):
        click.echo(
            'Error: YAML manifests must be processed one at a time.',
            err=True,
        )
        return -1
    elif manifest_files[0].endswith('.yaml'):
        with open(manifest_files[0], 'r') as yaml_manifest:
            upload_state = yaml.load(yaml_manifest, Loader=yaml.FullLoader)
        if upload_state['state_version'] > CURRENT_STATE_VERSION:
            click.echo(
                'Error: {} was generated by a newer version of the Panoptes '
                'CLI and is not compatible with this version.'.format(
                    manifest_files[0],
                ),
                err=True,
            )
            return -1
        if upload_state['subject_set_id'] != subject_set_id:
            click.echo(
                'Warning: You specified subject set {} but this YAML '
                'manifest is for subject set {}.'.format(
                    subject_set_id,
                    upload_state['subject_set_id'],
                ),
                err=True,
            )
            click.confirm(
                'Upload {} to subject set {} ({})?'.format(
                    manifest_files[0],
                    subject_set_id,
                    SubjectSet.find(subject_set_id).display_name,
                ),
                abort=True
            )
            upload_state['subject_set_id'] = subject_set_id
        resumed_upload = True
    else:
        upload_state = {
            'state_version': CURRENT_STATE_VERSION,
            'subject_set_id': subject_set_id,
            'manifest_files': manifest_files,
            'allow_missing': allow_missing,
            'remote_location': remote_location,
            'mime_type': mime_type,
            'file_column': file_column,
            'waiting_to_upload': [],
            'waiting_to_link': {},
        }
        resumed_upload = False

    remote_location_count = len(upload_state['remote_location'])
    mime_type_count = len(upload_state['mime_type'])
    if remote_location_count > 1 and mime_type_count == 1:
        upload_state['mime_type'] = (
            upload_state['mime_type'] * remote_location_count
        )
    elif remote_location_count > 0 and mime_type_count != remote_location_count:
        click.echo(
            'Error: The number of MIME types given must be either 1 or equal '
            'to the number of remote locations.',
            err=True,
        )
        return -1

    def validate_file(file_path):
        if not os.path.isfile(file_path):
            click.echo(
                'Error: File "{}" could not be found.'.format(
                    file_path,
                ),
                err=True,
            )
            return False

        file_size = os.path.getsize(file_path)
        if file_size == 0:
            click.echo(
                'Error: File "{}" is empty.'.format(
                    file_path,
                ),
                err=True,
            )
            return False
        elif file_size > MAX_UPLOAD_FILE_SIZE:
            click.echo(
                'Error: File "{}" is {}, larger than the maximum {}.'.format(
                    file_path,
                    humanize.naturalsize(file_size),
                    humanize.naturalsize(MAX_UPLOAD_FILE_SIZE),
                ),
                err=True,
            )
            return False
        return True

    subject_set = SubjectSet.find(upload_state['subject_set_id'])
    if not resumed_upload:
        subject_rows = []
        for manifest_file in upload_state['manifest_files']:
            with open(manifest_file, 'U') as manifest_f:
                file_root = os.path.dirname(manifest_file)
                r = csv.reader(manifest_f, skipinitialspace=True)
                headers = next(r)
                for row in r:
                    metadata = dict(zip(headers, row))
                    files = []
                    if not upload_state['file_column']:
                        upload_state['file_column'] = []
                        for field_number, col in enumerate(row, start=1):
                            file_path = os.path.join(file_root, col)
                            if os.path.exists(file_path):
                                upload_state['file_column'].append(
                                    field_number,
                                )
                                if not validate_file(file_path):
                                    return -1
                                files.append(file_path)
                    else:
                        for field_number in upload_state['file_column']:
                            file_path = os.path.join(
                                file_root,
                                row[field_number - 1]
                            )
                            if not validate_file(file_path):
                                return -1
                            files.append(file_path)

                    for field_number, _mime_type in zip(
                        upload_state['remote_location'],
                        upload_state['mime_type'],
                    ):
                        files.append({_mime_type: row[field_number - 1]})

                    if len(files) == 0:
                        click.echo(
                            'Could not find any files in row:',
                            err=True,
                        )
                        click.echo(','.join(row), err=True)
                        if not upload_state['allow_missing']:
                            return -1
                        else:
                            continue
                    subject_rows.append((files, metadata))

                if not subject_rows:
                    click.echo(
                        'File {} did not contain any rows.'.format(
                            manifest_file,
                        ),
                        err=True,
                    )
                    return -1

        subject_rows = list(enumerate(subject_rows))
        upload_state['waiting_to_upload'] = copy.deepcopy(subject_rows)
    else:
        for subject_id, subject_row in upload_state['waiting_to_link'].items():
            try:
                subject = Subject.find(subject_id)
            except PanoptesAPIException:
                upload_state['waiting_to_upload'].append(subject_row)
                del upload_state['waiting_to_link'][subject_id]
        subject_rows = copy.deepcopy(upload_state['waiting_to_upload'])

    pending_subjects = []

    def move_created(limit):
        while len(pending_subjects) > limit:
            for subject, subject_row in pending_subjects:
                if subject.async_save_result:
                    pending_subjects.remove((subject, subject_row))
                    upload_state['waiting_to_upload'].remove(subject_row)
                    upload_state['waiting_to_link'][subject.id] = subject_row
            time.sleep(0.5)

    def link_subjects(limit):
        if len(upload_state['waiting_to_link']) > limit:
            subject_set.add(list(upload_state['waiting_to_link'].keys()))
            upload_state['waiting_to_link'].clear()

    with click.progressbar(
        subject_rows,
        length=len(subject_rows),
        label='Uploading subjects',
    ) as _subject_rows:
        try:
            with Subject.async_saves():
                for subject_row in _subject_rows:
                    count, (files, metadata) = subject_row
                    subject = Subject()
                    subject.links.project = subject_set.links.project
                    for media_file in files:
                        subject.add_location(media_file)
                    subject.metadata.update(metadata)
                    subject.save()

                    pending_subjects.append((subject, subject_row))

                    move_created(MAX_PENDING_SUBJECTS)
                    link_subjects(LINK_BATCH_SIZE)

            move_created(0)
            link_subjects(0)
        finally:
            if (
                len(pending_subjects) > 0
                or len(upload_state['waiting_to_link']) > 0
            ):
                click.echo('Error: Upload failed.', err=True)
                if click.confirm(
                    'Would you like to save the upload state to resume the '
                    'upload later?',
                    default=True,
                ):
                    while True:
                        state_file_name = 'panoptes-upload-{}.yaml'.format(
                            subject_set_id,
                        )
                        state_file_name = click.prompt(
                            'Enter filename to save to',
                            default=state_file_name,
                        )

                        if not state_file_name.endswith('.yaml'):
                            click.echo(
                                'Error: File name must end in ".yaml".',
                                err=True,
                            )
                            if click.confirm(
                                'Save to {}.yaml?'.format(state_file_name),
                                default=True,
                            ):
                                state_file_name += '.yaml'
                            else:
                                continue
                        if not is_valid_filename(state_file_name):
                            click.echo(
                                'Error: {} is not a valid file name'.format(
                                    state_file_name,
                                ),
                                err=True,
                            )
                            sanitized_filename = sanitize_filename(
                                state_file_name,
                            )
                            if click.confirm(
                                'Save to {}?'.format(
                                    sanitized_filename,
                                ),
                                default=True,
                            ):
                                state_file_name = sanitized_filename
                            else:
                                continue
                        if os.path.exists(state_file_name):
                            if not click.confirm(
                                'File {} already exists. Overwrite?'.format(
                                    state_file_name,
                                ),
                                default=False,
                            ):
                                continue
                        break

                    with open(state_file_name, 'w') as state_file:
                        yaml.dump(upload_state, state_file)
Exemplo n.º 22
0
    def upload_chunks(self,
                      destination,
                      project_slug,
                      set_prefix,
                      zooniverse_login,
                      zooniverse_pwd,
                      batches=0,
                      **kwargs):
        self.destination = destination

        metadata_location = os.path.join(self.destination, 'chunks.csv')
        try:
            self.chunks = pd.read_csv(metadata_location, index_col='index')
        except:
            raise Exception(
                "cannot read chunk metadata in {}. Check the --destination parameter, and make sure you have extracted chunks before."
                .format(metadata_location))

        Panoptes.connect(username=zooniverse_login, password=zooniverse_pwd)
        zooniverse_project = Project.find(slug=project_slug)

        uploaded = 0
        for batch, chunks in self.chunks.groupby('batch'):
            if chunks['uploaded'].all():
                continue

            subjects_metadata = []

            subject_set = SubjectSet()
            subject_set.links.project = zooniverse_project
            subject_set.display_name = "{}_batch_{}".format(set_prefix, batch)
            subject_set.save()
            subjects = []

            _chunks = chunks.to_dict(orient='index')
            for chunk_index in _chunks:
                chunk = _chunks[chunk_index]

                print("uploading chunk {} ({},{}) in batch {}".format(
                    chunk['recording'], chunk['onset'], chunk['offset'],
                    batch))

                subject = Subject()
                subject.links.project = zooniverse_project
                subject.add_location(
                    os.path.join(self.destination, 'chunks', chunk['mp3']))
                subject.metadata['date_extracted'] = chunk['date_extracted']
                subject.save()
                subjects.append(subject)

                chunk['index'] = chunk_index
                chunk['zooniverse_id'] = subject.id
                chunk['project_slug'] = project_slug
                chunk['subject_set'] = str(subject_set.display_name)
                chunk['uploaded'] = True
                subjects_metadata.append(chunk)

            subject_set.add(subjects)

            self.chunks.update(
                pd.DataFrame(subjects_metadata).set_index('index'))

            self.chunks.to_csv(os.path.join(self.destination, 'chunks.csv'))
            uploaded += 1

            if batches > 0 and uploaded >= batches:
                return
if len(files) == 0:
  raise Exception('Error finding PNG files. Did you specify correct station? ('+BASEDIR+'ZOO/'+station+'/*.png)')
metadata = open(BASEDIR+station+'.zoo','r')
(fft,overlap,color_min,color_max) = metadata.readlines()

#Create uploaded directory if necessary
dest = BASEDIR+'ZOO/'+station+'/uploaded/'
if not(os.path.isdir(dest)):
    os.mkdir(dest)

for file in files:
    print "Uploading file %s" % file
    sys.stdout.flush()
    subject = Subject()
    subject.links.project = project
    subject.add_location(file)
    # You can set whatever metadata you want, or none at all
    subject.metadata['filename'] = os.path.basename(file)
    #TODO subject.metadata['file_start'] = 
    #TODO subject.metadata['sample_rate'] = 5512
    subject.metadata['fft'] = fft 
    subject.metadata['overlap'] = overlap
    subject.metadata['color_min'] = color_min
    subject.metadata['color_max'] = color_max
    #TODO subject.metadata['width'] =
    #TODO subject.metadata['height'] =    
    subject.save()
    subjects.append(subject)
    os.rename(file,dest+os.path.basename(file)) #move file to uploaded directory
#Create a new subject set or append the subjects to an existing one
for subject_set in project.links.subject_sets:
Exemplo n.º 24
0
project = Project.find(slug='pmason/fossiltrainer')

#  modify subject set name as needed:
set_name = 'test_url'

# This section sets up a subject set
try:
    # check if the subject set already exits
    subject_set = SubjectSet.where(project_id=project.id,
                                   display_name=set_name).next()
except StopIteration:
    # create a new subject set for the new data and link it to the project above
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = set_name
    subject_set.save()

# This section adds subjects from a manifest to the above subject set
with open(manifest_file, 'r') as mani_file:
    r = csv.DictReader(mani_file)
    for line in r:
        subject = Subject()
        subject.links.project = project

        #  modify the next three lines with the appropriate column headers from the manifest file
        subject.add_location({'image/jpeg': line['link']})
        subject.metadata['subject_id'] = line['subject_id']
        subject.metadata['image_name'] = line['image_name']
        subject.save()
        subject_set.add(subject.id)
Exemplo n.º 25
0
except StopIteration:
    # create a new subject set for the new data and link it to the project above
    subject_set_new = SubjectSet()
    subject_set_new.links.project = proj
    subject_set_new.display_name = new_set_name
    subject_set_new.save()

#  iterate through the subjects duplicating them and verifying they are created.
k = 0
for old_sub in add_subjects:
    old_subject = Subject(old_sub)
    try:
        new_subject = Subject()
        new_subject.links.project = proj
        for loc in old_subject.locations:
            new_subject.add_location(loc)
        new_subject.metadata = old_subject.metadata
        new_subject.save()
        subject_set_new.add(new_subject)
        print(new_subject.id, 'duplicated in new set to new set')
        k += 1
    except panoptes_client.panoptes.PanoptesAPIException:
        print(old_sub,  'did not duplicate correctly', str(sys.exc_info()[1]))
print(k, ' subjects linked to subject set ', new_set_name, ' in project ', proj_id)

linked = 0
with open(os.getcwd() + os.sep + 'duplicated_subjects.csv', 'wt', newline='', encoding='utf-8') as file:
    fieldnames = ['subject_id', 'Metadata', 'Locations']
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    subject_set = SubjectSet.where(project_id=proj_id, display_name=new_set_name).next()
Exemplo n.º 26
0
new_subjects = []

for img in images:
    try:
        s = Subject()
        s.links.project = project
        # manifest file
        if os.path.splitext(img)[1] == ".csv":   # upload manifest info.... not sure how this will be set up after second step
            # move csv to complete images folder
            shutil.copy(f, completed_images)
            # make dict out of csv file for upload
            manifest = csv.DictReader(open(img))
            s.metadata.update(manifest)
        else:
            # upload image to subject
            s.add_location(img)
            s.save()
            new_subjects.append(s)
            image_count+=1
    except Exception as e:
        f = open(logfile, "a")
        t = time.localtime()
        # move error files into seperate folder
        os.rename(img, errorfiles + os.path.basename(os.path.normpath(img)))
        f.write('Unable to upload ' + img + ': ' + str(e) + ' '+time.strftime("%D:%H:%M:%S", t)+'\n\n')
        f.close()

try:
    # add subjects to subject set
    subject_set.save()
    subject_set.add(new_subjects)
    if retry.lower() == 'n':
        quit()
    # create a new subject set for the new data and link it to the project above
    subject_set = SubjectSet()
    subject_set.links.project = project
    subject_set.display_name = set_name
    subject_set.save()

print('Uploading subjects, this could take a while!')
new_subjects = 0
for filename, metadata in subject_metadata.items():
    try:
        if filename not in previous_subjects:
            subject = Subject()
            subject.links.project = project
            subject.add_location(compress(location, filename, 960))
            subject.metadata.update(metadata)
            subject.save()
            subject_set.add(subject.id)
            new_subjects += 1
    except panoptes_client.panoptes.PanoptesAPIException:
        print('An error occurred during the upload of ', filename)
print(new_subjects, 'new subjects created and uploaded')
print(
    'Uploading complete, Please wait while the full subject listing is prepared and saved in'
)
print('"Uploaded subjects.csv" in the drive with the original images')

uploaded = 0
with open(location + os.sep + 'Uploaded subjects.csv', 'wt') as file_up:
    file_up.write('subject.id' + ',' + 'Filename' + '\n')
            }

            segments.append(segment)
    print('Item segments transformation complete.')
    return segments

segments = transform_item_segments('https://www.loc.gov/item/' + LIBRARY_OF_CONGRESS_ITEM_ID)

Panoptes.connect(username=USERNAME, password=PASSWORD, endpoint=ENDPOINT)

project = Project.find(PROJECT)

subject_set = SubjectSet()
subject_set.links.project = project
subject_set.display_name = segments[0]['metadata']['Title'] # uses item Title as default subject set name, or feel free to hardcode
subject_set.save()

print('Begin Zooniverse subject upload...')
for segment in segments:
    subject = Subject()

    subject.links.project = project
    subject.add_location(segment['location'])

    subject.metadata.update(segment['metadata'])

    subject.save()
    subject_set.add(subject)

print("Zooniverse subject upload complete.")
Exemplo n.º 29
0
    def upload_chunks(self,
                      chunks: str,
                      project_id: int,
                      set_name: str,
                      zooniverse_login="",
                      zooniverse_pwd="",
                      amount: int = 1000,
                      ignore_errors: bool = False,
                      **kwargs):
        """Uploads ``amount`` audio chunks from the CSV dataframe `chunks` to a zooniverse project.

        :param chunks: path to the chunk CSV dataframe
        :type chunks: [type]
        :param project_id: zooniverse project id
        :type project_id: int
        :param set_name: name of the subject set
        :type set_name: str
        :param zooniverse_login: zooniverse login. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_LOGIN`` instead, defaults to ''
        :type zooniverse_login: str, optional
        :param zooniverse_pwd: zooniverse password. If not specified, the program attempts to get it from the environment variable ``ZOONIVERSE_PWD`` instead, defaults to ''
        :type zooniverse_pwd: str, optional
        :param amount: amount of chunks to upload, defaults to 0
        :type amount: int, optional
        """

        self.chunks_file = chunks
        self.get_credentials(zooniverse_login, zooniverse_pwd)

        metadata_location = os.path.join(self.chunks_file)
        try:
            self.chunks = pd.read_csv(metadata_location, index_col="index")
        except:
            raise Exception("cannot read chunk metadata from {}.".format(
                metadata_location))

        assert_dataframe("chunks", self.chunks)
        assert_columns_presence(
            "chunks",
            self.chunks,
            {"recording_filename", "onset", "offset", "uploaded", "mp3"},
        )

        from panoptes_client import Panoptes, Project, Subject, SubjectSet

        Panoptes.connect(username=self.zooniverse_login,
                         password=self.zooniverse_pwd)
        zooniverse_project = Project(project_id)

        subjects_metadata = []
        uploaded = 0

        subject_set = None

        for ss in zooniverse_project.links.subject_sets:
            if ss.display_name == set_name:
                subject_set = ss

        if subject_set is None:
            subject_set = SubjectSet()
            subject_set.links.project = zooniverse_project
            subject_set.display_name = set_name
            subject_set.save()

        subjects = []

        chunks_to_upload = self.chunks[self.chunks["uploaded"] == False].head(
            amount)
        chunks_to_upload = chunks_to_upload.to_dict(orient="index")

        if len(chunks_to_upload) == 0:
            print("nothing left to upload.")
            return

        for chunk_index in chunks_to_upload:
            chunk = chunks_to_upload[chunk_index]

            print("uploading chunk {} ({},{})".format(
                chunk["recording_filename"], chunk["onset"], chunk["offset"]))

            subject = Subject()
            subject.links.project = zooniverse_project
            subject.add_location(
                os.path.join(os.path.dirname(self.chunks_file), "chunks",
                             chunk["mp3"]))
            subject.metadata["date_extracted"] = chunk["date_extracted"]

            try:
                subject.save()
            except Exception as e:
                print("failed to save chunk {}. an exception has occured:\n{}".
                      format(chunk_index, str(e)))
                print(traceback.format_exc())

                if args.ignore_errors:
                    continue
                else:
                    print("subject upload halting here.")
                    break

            subjects.append(subject)

            chunk["index"] = chunk_index
            chunk["zooniverse_id"] = str(subject.id)
            chunk["project_id"] = str(project_id)
            chunk["subject_set"] = str(subject_set.display_name)
            chunk["uploaded"] = True
            subjects_metadata.append(chunk)

        if len(subjects) == 0:
            return

        subject_set.add(subjects)

        self.chunks.update(pd.DataFrame(subjects_metadata).set_index("index"))

        self.chunks.to_csv(self.chunks_file)
Exemplo n.º 30
0
            for i in range(eights, groups):
                group_list.append(7)

    k = 0
    for index in range(0, len(group_list)):
        date_times = ''
        files = ''
        try:
            subject = Subject()
            new_subjects += 1
            subject.links.project = project
            for j in range(0, group_list[index]):
                compressed_file = compress(
                    location + os.sep + seq[k][0], 900000,
                    r'C:\py\image_manipulation\temp_file.jpg')
                subject.add_location(compressed_file)
                files += seq[k][0] + ', '
                date_times += seq[k][1] + ', '
                images_uploaded += 1
                k += 1
            subject.metadata['File_group'] = files[:-2]
            subject.metadata['Site_Date'] = set_name
            subject.metadata['Date_times'] = date_times[:-2]
            print('Uploading group, this could take a while!')
            subject.save()
            subject_set.add(subject.id)
            print(new_subjects, subject.metadata['File_group'],
                  subject.metadata['Date_times'])
        except panoptes_client.panoptes.PanoptesAPIException:
            print('An error occurred during the upload of ', files)
print(images_uploaded, 'images uploaded into', new_subjects, 'subjects')
Exemplo n.º 31
0
#(img) C:/Users/Rdebbout/Downloads/vids_DUL>ffmpeg -i test_out4.mp4 -b 1397520 bit_down2.mp4
# resolution
#(img) C:/Users/Rdebbout/Downloads/vids_DUL>ffmpeg -i test_out4.mp4 -vf scale=960:540 bit_down_scale.mp4

# ffprobe -v quiet -print_format json -show_format -show_streams test_out4.mp4 > op.json

################################################################################

from panoptes_client import SubjectSet, Subject, Project, Panoptes

Panoptes.connect(username='******', password='******')
project = Project.find(id = 5483)
subject_set = SubjectSet.find(17639)
subject = Subject()
subject.links.project = project
subject.add_location({'video/mp4': ('C:/Users/Rdebbout/Downloads/vids_DUL/'
                        'test_frame_rate/duo_DVR150925_1432_001clip.mp4')})
subject.metadata['site_id'] = 'NCCAGL10-1047'
subject.save()
subject_set.add(subject)

################################################################################

here = 'C:/Users/Rdebbout/Downloads/vids_DUL/test_frame_rate/prepare_ye'
tbl_list = pd.read_csv('CitSci_VideoList_beta.csv')

for f in os.listdir(here):
    print f
    subprocess.call('ffmpeg -i {0} -vf scale=960:540 {1}_test.mpeg'.format(f,f.split('.')[0]))


################################################################################
Exemplo n.º 32
0
def create_subjects_and_link_to_project(proto_subjects,
                                        project_id,
                                        subject_set_id,
                                        subject_set_name=None):
    ''' find the project and relevant subject set. Get the existing subject data and compare to the new proto_subjects.
    Upload any instances of nbew subjects to the project

    Keyword Arguments:
    proto_subjects -- dictionary structure containing subject filepath+filename, and associated metadata
    project_id -- identifier to find and link with the project
    subject_set_id -- identifier for the subject set of interest
    '''

    # get the project object
    project = Project.find(project_id)

    # set up subject_set
    if subject_set_id == None:
        subject_set = SubjectSet()  # create empty subject_set
        subject_set.links.project = project

        if subject_set_name == None:  # if not defined generate a random subject set name to avoid error when a set already exists
            subject_set_name = 'subject_set_{:02d}_{:02d}_{:04d}_{}'.format(
                date.day, date.month, date.year,
                ''.join(generate_random_str()))
        print("will create a subject set called: {}".format(subject_set_name))
        subject_set.display_name = subject_set_name  # set the name of the subject set
        subject_set.save()
        project.reload()
    else:
        subject_set = SubjectSet().find(
            subject_set_id)  # find the existing subject_set
        existing_subject_set_name = subject_set.display_name  # get its name

        # if you have tried to set the subject set name, check that it matches the name for the chosen subject set id
        if (subject_set_name != None) and (existing_subject_set_name !=
                                           subject_set_name):
            print(
                "your chosen subject set name does not match the existing name: {}, {}"
                .format(subject_set_name, existing_subject_set_name))
            return -1
        else:
            subject_set_name = existing_subject_set_name

        print("add to existing subject set: {}".format(subject_set_name))

    # Create a list of the existing subject metadata
    meta_list = []
    print("existing subjects:")
    for subject in subject_set.subjects:
        print(subject.id, subject.metadata)
        meta_list.append(subject.metadata)

    # When making list of subjects to add, check to see if the metadata of the subject you want to add is already in the set
    print("new subjects:")
    new_subjects = []
    for filename, metadata in proto_subjects.items():

        # check if this subject is already in the subject set
        if np.isin(metadata, meta_list):
            print("{}, subject already in set".format(metadata))
            # In this case we skip over the subject that already exists.
            # N.B. you may want to remove an existing subject and update it with the new one
            continue

        # Otherwise we can add the subject to the new subject list
        else:
            subject = Subject()

            subject.links.project = project
            subject.add_location(filename)

            subject.metadata.update(metadata)

            subject.save()
            new_subjects.append(subject)
            print("{}, new subject add to list".format(metadata))

    print("new subjects to add: {}".format(new_subjects))

    # add the new subject list (data and metadata) to the already defined project subject set
    subject_set.add(new_subjects)

    return
Exemplo n.º 33
0
 if np.isnan(row['t01_smooth_or_features_a02_features_or_disk_weighted_fraction']):
     pbar = 'NaN'
     pspiral = 'NaN'
     dr8id = 'NaN'
     dr7id = 'NaN'
     specid = 'NaN'
 else:
     pbar = row['t01_smooth_or_features_a02_features_or_disk_weighted_fraction']*row['t02_edgeon_a05_no_weighted_fraction']*row['t03_bar_a06_bar_weighted_fraction']
     pspiral = row['t01_smooth_or_features_a02_features_or_disk_weighted_fraction']*row['t02_edgeon_a05_no_weighted_fraction']*row['t04_spiral_a08_spiral_weighted_fraction']
     dr8id = row['dr8objid']
     dr7id = row['dr7objid']
     specid = row['specobjid']
 summer += 1
 subject = Subject()
 subject.links.project = project
 subject.add_location('./manga_mpl4_cutouts/cutouts/{0}.jpg'.format(row['MANGAID'].decode('utf-8')))
 subject.metadata['RA'] = row['RA']
 subject.metadata['DEC'] = row['DEC']
 subject.metadata['MANGAID'] = row['MANGAID'].decode('utf-8')
 subject.metadata['Z'] = row['Z']
 subject.metadata['PETROTH50'] = row['PETROTH50']
 subject.metadata['#MANGA_TILEID'] = row['MANGA_TILEID']
 subject.metadata['#NSAID'] = row['NSAID']
 subject.metadata['#SERSIC_TH50'] = row['SERSIC_TH50']
 subject.metadata['#P(Bar)'] = pbar
 subject.metadata['#P(Spiral)'] = pspiral
 subject.metadata['#specobjid'] = specid
 subject.metadata['#dr8objid'] = dr8id
 subject.metadata['#dr7objid'] = dr7id
 try:
     subject.save()