def save(self): # Randomly generate a password. username, email, password = (self.cleaned_data['username'], self.cleaned_data['email'], rand_string(10)) new_user = UserenaSignup.objects.create_inactive_user(username, email, password, send_email=False) # Send the activation email. Include the generated password. userena_signup_obj = UserenaSignup.objects.get(user__username=username) send_activation_email_with_password(userena_signup_obj, password) return new_user
def annotations_file_to_python(annoFile, source, expecting_labels): """ Takes: an annotations file Returns: the Pythonized annotations: A dictionary like this: {'Shore1;Reef3;...;2008': [{'row':'695', 'col':'802', 'label':'POR'}, {'row':'284', 'col':'1002', 'label':'ALG'}, ...], 'Shore2;Reef5;...;2009': [...] ... } Checks for: correctness of file formatting, i.e. all words/tokens are there on each line (will throw an error otherwise) """ # We'll assume annoFile is an InMemoryUploadedFile, as opposed to a filename of a temp-disk-storage file. # If we encounter a case where we have a filename, use the below: #annoFile = open(annoFile, 'r') # Format args: line number, line contents, error message file_error_format_str = str_consts.ANNOTATION_FILE_FULL_ERROR_MESSAGE_FMTSTR numOfKeys = source.num_of_keys() uniqueLabelCodes = [] # The order of the words/tokens is encoded here. If the order ever # changes, we should only have to change this part. words_format_without_label = ['value'+str(i) for i in range(1, numOfKeys+1)] words_format_without_label += ['date', 'row', 'col'] words_format_with_label = words_format_without_label + ['label'] num_words_with_label = len(words_format_with_label) num_words_without_label = len(words_format_without_label) # The annotation dict needs to be kept on disk temporarily until all the # Ajax upload requests are done. Thus, we'll use Python's shelve module # to make a persistent dict. if not os.access(settings.SHELVED_ANNOTATIONS_DIR, os.R_OK | os.W_OK): # Don't catch this error and display it to the user. # Just let it become a server error to be e-mailed to the admins. raise DirectoryAccessError( "The SHELVED_ANNOTATIONS_DIR either does not exist, is not readable, or is not writable. Please rectify this." ) annotation_dict_id = rand_string(10) annotation_dict = shelve.open(os.path.join( settings.SHELVED_ANNOTATIONS_DIR, 'source{source_id}_{dict_id}'.format( source_id=source.id, dict_id=annotation_dict_id, ), )) for line_num, line in enumerate(annoFile, 1): # Strip any leading UTF-8 BOM, then strip any # leading/trailing whitespace. stripped_line = line.lstrip(codecs.BOM_UTF8).strip() # Ignore empty lines. if stripped_line == '': continue # Split the line into words/tokens. unstripped_words = stripped_line.split(';') # Strip leading and trailing whitespace from each token. words = [w.strip() for w in unstripped_words] # Check that all expected words/tokens are there. is_valid_format_with_label = (len(words) == num_words_with_label) is_valid_format_without_label = (len(words) == num_words_without_label) words_format_is_valid = ( (expecting_labels and is_valid_format_with_label) or (not expecting_labels and (is_valid_format_with_label or is_valid_format_without_label)) ) if expecting_labels: num_words_expected = num_words_with_label else: num_words_expected = num_words_without_label if not words_format_is_valid: annotation_dict.close() annoFile.close() raise FileContentError(file_error_format_str.format( line_num=line_num, line=stripped_line, error=str_consts.ANNOTATION_FILE_TOKEN_COUNT_ERROR_FMTSTR.format( num_words_expected=num_words_expected, num_words_found=len(words), ) )) # Encode the line data into a dictionary: {'value1':'Shore2', 'row':'575', ...} if is_valid_format_with_label: lineData = dict(zip(words_format_with_label, words)) else: # valid format without label lineData = dict(zip(words_format_without_label, words)) try: row = int(lineData['row']) if row <= 0: raise ValueError except ValueError: annotation_dict.close() annoFile.close() raise FileContentError(file_error_format_str.format( line_num=line_num, line=stripped_line, error=str_consts.ANNOTATION_FILE_ROW_NOT_POSITIVE_INT_ERROR_FMTSTR.format(row=lineData['row']), )) try: col = int(lineData['col']) if col <= 0: raise ValueError except ValueError: annotation_dict.close() annoFile.close() raise FileContentError(file_error_format_str.format( line_num=line_num, line=stripped_line, error=str_consts.ANNOTATION_FILE_COL_NOT_POSITIVE_INT_ERROR_FMTSTR.format(column=lineData['col']), )) if expecting_labels: # Check that the label code corresponds to a label in the database # and in the source's labelset. # Only check this if the label code hasn't been seen before # in the annotations file. label_code = lineData['label'] if label_code not in uniqueLabelCodes: labelObjs = Label.objects.filter(code=label_code) if len(labelObjs) == 0: annotation_dict.close() annoFile.close() raise FileContentError(file_error_format_str.format( line_num=line_num, line=stripped_line, error=str_consts.ANNOTATION_FILE_LABEL_NOT_IN_DATABASE_ERROR_FMTSTR.format(label_code=label_code), )) labelObj = labelObjs[0] if labelObj not in source.labelset.labels.all(): annotation_dict.close() annoFile.close() raise FileContentError(file_error_format_str.format( line_num=line_num, line=stripped_line, error=str_consts.ANNOTATION_FILE_LABEL_NOT_IN_LABELSET_ERROR_FMTSTR.format(label_code=label_code), )) uniqueLabelCodes.append(label_code) # Get and check the photo year to make sure it's valid. # We'll assume the year is the first 4 characters of the date. year = lineData['date'][:4] try: datetime.date(int(year),1,1) # Year is non-coercable to int, or year is out of range (e.g. 0 or negative) except ValueError: annotation_dict.close() annoFile.close() raise FileContentError(file_error_format_str.format( line_num=line_num, line=stripped_line, error=str_consts.ANNOTATION_FILE_YEAR_ERROR_FMTSTR.format(year=year), )) # TODO: Check if the row and col in this line are a valid row and col # for the image. Need the image to do that, though... # Use the location values and the year to build a string identifier for the image, such as: # Shore1;Reef5;...;2008 valueList = [lineData['value'+str(i)] for i in range(1,numOfKeys+1)] imageIdentifier = get_image_identifier(valueList, year) # Add/update a dictionary entry for the image with this identifier. # The dict entry's value is a list of labels. Each label is a dict: # {'row':'484', 'col':'320', 'label':'POR'} if not annotation_dict.has_key(imageIdentifier): annotation_dict[imageIdentifier] = [] # Append the annotation as a dict containing row, col, and label # (or just row and col, if no labels). # # Can't append directly to annotation_dict[imageIdentifier], due to # how shelved dicts work. So we use this pattern with a temporary # variable. # See http://docs.python.org/library/shelve.html?highlight=shelve#example tmp_data = annotation_dict[imageIdentifier] if expecting_labels: tmp_data.append( dict(row=row, col=col, label=lineData['label']) ) else: tmp_data.append( dict(row=row, col=col) ) annotation_dict[imageIdentifier] = tmp_data annoFile.close() return (annotation_dict, annotation_dict_id)
def store_csv_file(csv_file, source): """ This will store the csv_file uploaded using python's shelve module temporarily. Also does a few error checks, such as if the length of the rows are too long, if there are duplicate filenames present in the file, etc. """ # TODO: If we return the whole CSV dict to the Javascript side anyway, # then we don't really need to keep a shelved version of the dict # on the server side. That's redundant. if not os.access(settings.SHELVED_ANNOTATIONS_DIR, os.R_OK | os.W_OK): # Don't catch this error and display it to the user. # Just let it become a server error to be e-mailed to the admins. raise DirectoryAccessError( "The SHELVED_ANNOTATIONS_DIR either does not exist, is not readable, or is not writable. Please rectify this." ) csv_dict_id = rand_string(10) csv_dict = dict() # splitlines() is to do system-agnostic handling of newline characters. # The csv module can't do that by default (fails on CR only). reader = csv.reader(csv_file.read().splitlines(), dialect='excel') num_keys = source.num_of_keys() filenames_processed = [] fields = (['photo_date'] + ['value1', 'value2', 'value3', 'value4', 'value5'][:num_keys] + ['height_in_cm', 'latitude', 'longitude', 'depth', 'camera', 'photographer', 'water_quality', 'strobes', 'framing', 'balance']) for row in reader: metadata_for_file = {} # Gets filename, strips any UTF-8 BOM from the start of the CSV line. filename = row.pop(0).lstrip(codecs.BOM_UTF8) # Checks if we already found data for this filename. if filename in filenames_processed: raise FileContentError('metadata for file "{file}" found twice in CSV file.'.format( file=filename, )) filenames_processed.append(filename) if len(row) > len(fields): raise FileContentError("{file}: Too many metadata values.".format(file=filename)) if len(row) < len(fields): raise FileContentError("{file}: Too few metadata values.".format(file=filename)) # Num of comma-separated values equals num of expected fields. # Get the metadata from the CSV row. for field_name, value in zip(fields, row): metadata_for_file[field_name] = value csv_dict[filename] = metadata_for_file csv_shelf_dict = shelve.open(os.path.join( settings.SHELVED_ANNOTATIONS_DIR, 'csv_source{source_id}_{dict_id}.db'.format( source_id=source.id, dict_id=csv_dict_id, ), )) for k,v in csv_dict.iteritems(): csv_shelf_dict[k] = v csv_shelf_dict.close() return (csv_dict, csv_dict_id)