def _clean_valid_inputformat(self, name): """Ensure field with given name has a valid format. @param name: name of field to clean @type name: string @return: list of integers @rtype: list of integers """ if name in self.cleaned_data: if not self.cleaned_data[name]: return None # list ? datasplit if type(self.cleaned_data[name])==list: out = [] splits= [] for split in self.cleaned_data[name]: if not check_split_str(split): raise forms.ValidationError('invalid format') else: split = [int(x) for x in expand_split_str(split)] if len(split)>0 and (int(split[-1]) >= self.cleaned_data['data'].num_instances or int(split[0]) < 0): raise forms.ValidationError('index out of bounds') splits.append(split) out.append(split) dset=['train_idx','val_idx','test_idx'] for d in dset: if self.data.has_key(d) and d != name: intersec=check_split_intersec([[expand_split_str(i) for i in self.data.getlist(d)],splits]) if intersec: raise forms.ValidationError('index intersection in row ' + str(intersec) ) return out # else input or output variables else: if check_split_str(self.cleaned_data[name]): split=[int(x) for x in expand_split_str(self.cleaned_data[name])] if len(split)>0: if (split[-1] >= self.cleaned_data['data'].num_attributes or split[0] < 0): raise forms.ValidationError('index out of bounds') dset=['input_variables','output_variables'] #import pdb #pdb.set_trace() for d in dset: if self.data.has_key(d) and d != name: intersec=check_split_intersec([[int(i) for i in expand_split_str(self.data.getlist(d))],split]) if intersec: raise forms.ValidationError('index intersection') else: raise forms.ValidationError('invalid format') return split else: raise ValidationError(_('Invalid format (example: 0,1,2:5,5 = 0,1,2,3,4,5)'))
def _clean_valid_inputformat(self, name): """Ensure field with given name has a valid format. @param name: name of field to clean @type name: string @return: list of integers @rtype: list of integers """ if name in self.cleaned_data: if not self.cleaned_data[name]: return None # list ? datasplit if type(self.cleaned_data[name]) == list: dset = ['train_idx', 'val_idx', 'test_idx'] out = [] splits = [] i = 0 # loop through all split parts of the same kind (name) for split in self.cleaned_data[name]: # handle percent if len(split) > 0 and split[-1] == '%': # setup frac = float(split[:-1]) / 100.0 numat = self.cleaned_data['data'].num_instances ints = range(0, numat) # filter already selected instances for d in dset: if self.cleaned_data.has_key( d) and d != name and len( self.cleaned_data[d]) >= i: spl = self.cleaned_data[d][i] ints = filter(lambda x: not x in spl, ints) numrest = len(ints) if (numrest == 0): raise forms.ValidationError('sum of percents > 1') # sample adjust sampling ratio to number of instances left frac = frac * float(numat) / float(numrest) if (frac > 1): raise forms.ValidationError('sum of percents > 1') # sample with given ratio from remaining instances split = random.sample(ints, int(math.floor(frac * numrest))) # save the split splits.append(split) out.append(split) elif not check_split_str(split): raise forms.ValidationError('invalid format') else: # convert python-like string into list of integers split = [int(x) for x in expand_split_str(split)] if len(split) > 0 and ( int(split[-1]) >= self.cleaned_data['data'].num_instances or int(split[0]) < 0): raise forms.ValidationError('index out of bounds') # save the split splits.append(split) out.append(split) i += 1 # check if any of splits intersect with each other for d in dset: if self.data.has_key(d) and d != name: intersec = check_split_intersec([[ expand_split_str(i) for i in self.data.getlist(d) ], splits]) if intersec: raise forms.ValidationError( 'index intersection in row ' + str(intersec)) return out # else input or output variables else: if check_split_str(self.cleaned_data[name]): split = [ int(x) for x in expand_split_str(self.cleaned_data[name]) ] if len(split) > 0: if (split[-1] >= self.cleaned_data['data'].num_attributes or split[0] < 0): raise forms.ValidationError('index out of bounds') dset = ['input_variables', 'output_variables'] #import pdb #pdb.set_trace() for d in dset: if self.data.has_key(d) and d != name: intersec = check_split_intersec([[ int(i) for i in expand_split_str(self.data.getlist(d)) ], split]) if intersec: raise forms.ValidationError( 'index intersection') else: raise forms.ValidationError('invalid format') return split else: raise ValidationError( _('Invalid format (example: 0,1,2:5,5 = 0,1,2,3,4,5)'))
def _clean_valid_inputformat(self, name): """Ensure field with given name has a valid format. @param name: name of field to clean @type name: string @return: list of integers @rtype: list of integers """ if name in self.cleaned_data: if not self.cleaned_data[name]: return None # list ? datasplit if type(self.cleaned_data[name])==list: dset=['train_idx','val_idx','test_idx'] out = [] splits= [] i = 0 # loop through all split parts of the same kind (name) for split in self.cleaned_data[name]: # handle percent if len(split) > 0 and split[-1] == '%': # setup frac = float(split[:-1]) / 100.0 numat = self.cleaned_data['data'].num_instances ints = range(0,numat) # filter already selected instances for d in dset: if self.cleaned_data.has_key(d) and d != name and len(self.cleaned_data[d]) >= i: spl = self.cleaned_data[d][i] ints = filter(lambda x: not x in spl, ints) numrest = len(ints) if (numrest == 0): raise forms.ValidationError('sum of percents > 1') # sample adjust sampling ratio to number of instances left frac = frac * float(numat) / float(numrest) if (frac > 1): raise forms.ValidationError('sum of percents > 1') # sample with given ratio from remaining instances split = random.sample(ints, int(math.floor(frac*numrest))) # save the split splits.append(split) out.append(split) elif not check_split_str(split): raise forms.ValidationError('invalid format') else: # convert python-like string into list of integers split = [int(x) for x in expand_split_str(split)] if len(split)>0 and (int(split[-1]) >= self.cleaned_data['data'].num_instances or int(split[0]) < 0): raise forms.ValidationError('index out of bounds') # save the split splits.append(split) out.append(split) i+=1 # check if any of splits intersect with each other for d in dset: if self.data.has_key(d) and d != name: intersec=check_split_intersec([[expand_split_str(i) for i in self.data.getlist(d)],splits]) if intersec: raise forms.ValidationError('index intersection in row ' + str(intersec) ) return out # else input or output variables else: if check_split_str(self.cleaned_data[name]): split=[int(x) for x in expand_split_str(self.cleaned_data[name])] if len(split)>0: if (split[-1] >= self.cleaned_data['data'].num_attributes or split[0] < 0): raise forms.ValidationError('index out of bounds') dset=['input_variables','output_variables'] #import pdb #pdb.set_trace() for d in dset: if self.data.has_key(d) and d != name: intersec=check_split_intersec([[int(i) for i in expand_split_str(self.data.getlist(d))],split]) if intersec: raise forms.ValidationError('index intersection') else: raise forms.ValidationError('invalid format') return split else: raise ValidationError(_('Invalid format (example: 0,1,2:5,5 = 0,1,2,3,4,5)'))