Exemplo n.º 1
0
    def _clean_valid_inputformat(self, name):
        """Ensure field with given name has a valid format.

        @param name: name of field to clean
        @type name: string
        @return: list of integers
        @rtype: list of integers
        """
        if name in self.cleaned_data:
            if not self.cleaned_data[name]:
                return None
            # list ?  datasplit
            if type(self.cleaned_data[name])==list:
                out = []
                splits= []
                for split in self.cleaned_data[name]: 
                    if not check_split_str(split):
                        raise forms.ValidationError('invalid format')
                    else:        
                        split = [int(x) for x in expand_split_str(split)]
                        if len(split)>0 and (int(split[-1]) >= self.cleaned_data['data'].num_instances or int(split[0]) < 0):
                            raise forms.ValidationError('index out of bounds')
                                                
                        splits.append(split)
                        out.append(split)
                dset=['train_idx','val_idx','test_idx']
                for d in dset:
                    if self.data.has_key(d) and d != name:
                        intersec=check_split_intersec([[expand_split_str(i) for i in self.data.getlist(d)],splits])    
                        if intersec:
                            raise forms.ValidationError('index intersection in row ' + str(intersec) )
                return out

        # else input or output variables
            else:
                if check_split_str(self.cleaned_data[name]):
                    split=[int(x) for x in expand_split_str(self.cleaned_data[name])]
                    if len(split)>0:
                        if (split[-1] >= self.cleaned_data['data'].num_attributes or split[0] < 0):
                            raise forms.ValidationError('index out of bounds')
                    dset=['input_variables','output_variables']
                    #import pdb
                    #pdb.set_trace()
                    for d in dset:
                        if self.data.has_key(d) and d != name:
                            intersec=check_split_intersec([[int(i) for i in expand_split_str(self.data.getlist(d))],split])    
                            if intersec:
                                raise forms.ValidationError('index intersection')
                else:
                    raise forms.ValidationError('invalid format')
                            
                return split 
        else:
            raise ValidationError(_('Invalid format (example: 0,1,2:5,5 = 0,1,2,3,4,5)'))
Exemplo n.º 2
0
    def _clean_valid_inputformat(self, name):
        """Ensure field with given name has a valid format.

        @param name: name of field to clean
        @type name: string
        @return: list of integers
        @rtype: list of integers
        """
        if name in self.cleaned_data:
            if not self.cleaned_data[name]:
                return None
            # list ?  datasplit
            if type(self.cleaned_data[name]) == list:
                dset = ['train_idx', 'val_idx', 'test_idx']
                out = []
                splits = []
                i = 0

                # loop through all split parts of the same kind (name)
                for split in self.cleaned_data[name]:
                    # handle percent
                    if len(split) > 0 and split[-1] == '%':
                        # setup
                        frac = float(split[:-1]) / 100.0
                        numat = self.cleaned_data['data'].num_instances
                        ints = range(0, numat)

                        # filter already selected instances
                        for d in dset:
                            if self.cleaned_data.has_key(
                                    d) and d != name and len(
                                        self.cleaned_data[d]) >= i:
                                spl = self.cleaned_data[d][i]
                                ints = filter(lambda x: not x in spl, ints)
                        numrest = len(ints)
                        if (numrest == 0):
                            raise forms.ValidationError('sum of percents > 1')

                        # sample adjust sampling ratio to number of instances left
                        frac = frac * float(numat) / float(numrest)

                        if (frac > 1):
                            raise forms.ValidationError('sum of percents > 1')

                        # sample with given ratio from remaining instances
                        split = random.sample(ints,
                                              int(math.floor(frac * numrest)))

                        # save the split
                        splits.append(split)
                        out.append(split)
                    elif not check_split_str(split):
                        raise forms.ValidationError('invalid format')
                    else:
                        # convert python-like string into list of integers
                        split = [int(x) for x in expand_split_str(split)]
                        if len(split) > 0 and (
                                int(split[-1]) >=
                                self.cleaned_data['data'].num_instances
                                or int(split[0]) < 0):
                            raise forms.ValidationError('index out of bounds')

                        # save the split
                        splits.append(split)
                        out.append(split)
                    i += 1

                # check if any of splits intersect with each other
                for d in dset:
                    if self.data.has_key(d) and d != name:
                        intersec = check_split_intersec([[
                            expand_split_str(i) for i in self.data.getlist(d)
                        ], splits])
                        if intersec:
                            raise forms.ValidationError(
                                'index intersection in row ' + str(intersec))
                return out

        # else input or output variables
            else:
                if check_split_str(self.cleaned_data[name]):
                    split = [
                        int(x)
                        for x in expand_split_str(self.cleaned_data[name])
                    ]
                    if len(split) > 0:
                        if (split[-1] >=
                                self.cleaned_data['data'].num_attributes
                                or split[0] < 0):
                            raise forms.ValidationError('index out of bounds')
                    dset = ['input_variables', 'output_variables']
                    #import pdb
                    #pdb.set_trace()
                    for d in dset:
                        if self.data.has_key(d) and d != name:
                            intersec = check_split_intersec([[
                                int(i)
                                for i in expand_split_str(self.data.getlist(d))
                            ], split])
                            if intersec:
                                raise forms.ValidationError(
                                    'index intersection')
                else:
                    raise forms.ValidationError('invalid format')

                return split
        else:
            raise ValidationError(
                _('Invalid format (example: 0,1,2:5,5 = 0,1,2,3,4,5)'))
Exemplo n.º 3
0
    def _clean_valid_inputformat(self, name):
        """Ensure field with given name has a valid format.

        @param name: name of field to clean
        @type name: string
        @return: list of integers
        @rtype: list of integers
        """
        if name in self.cleaned_data:
            if not self.cleaned_data[name]:
                return None
            # list ?  datasplit
            if type(self.cleaned_data[name])==list:
                dset=['train_idx','val_idx','test_idx']
                out = []
                splits= []
                i = 0
                
                # loop through all split parts of the same kind (name)
                for split in self.cleaned_data[name]: 
                    # handle percent
                    if len(split) > 0 and split[-1] == '%':
                        # setup
                        frac = float(split[:-1]) / 100.0
                        numat = self.cleaned_data['data'].num_instances
                        ints = range(0,numat)
                        
                        # filter already selected instances
                        for d in dset:
                            if self.cleaned_data.has_key(d) and d != name and len(self.cleaned_data[d]) >= i:
                                spl = self.cleaned_data[d][i]
                                ints = filter(lambda x: not x in spl, ints)
                        numrest = len(ints)
                        if (numrest == 0):
                            raise forms.ValidationError('sum of percents > 1')

                        # sample adjust sampling ratio to number of instances left
                        frac = frac * float(numat) / float(numrest)

                        if (frac > 1):
                            raise forms.ValidationError('sum of percents > 1')
                        
                        # sample with given ratio from remaining instances
                        split = random.sample(ints, int(math.floor(frac*numrest)))

                        # save the split
                        splits.append(split)
                        out.append(split)
                    elif not check_split_str(split):
                        raise forms.ValidationError('invalid format')
                    else:
                        # convert python-like string into list of integers
                        split = [int(x) for x in expand_split_str(split)]
                        if len(split)>0 and (int(split[-1]) >= self.cleaned_data['data'].num_instances or int(split[0]) < 0):
                            raise forms.ValidationError('index out of bounds')
                                                
                        # save the split
                        splits.append(split)
                        out.append(split)
                    i+=1
                    
                # check if any of splits intersect with each other
                for d in dset:
                    if self.data.has_key(d) and d != name:
                        intersec=check_split_intersec([[expand_split_str(i) for i in self.data.getlist(d)],splits])    
                        if intersec:
                            raise forms.ValidationError('index intersection in row ' + str(intersec) )
                return out

        # else input or output variables
            else:
                if check_split_str(self.cleaned_data[name]):
                    split=[int(x) for x in expand_split_str(self.cleaned_data[name])]
                    if len(split)>0:
                        if (split[-1] >= self.cleaned_data['data'].num_attributes or split[0] < 0):
                            raise forms.ValidationError('index out of bounds')
                    dset=['input_variables','output_variables']
                    #import pdb
                    #pdb.set_trace()
                    for d in dset:
                        if self.data.has_key(d) and d != name:
                            intersec=check_split_intersec([[int(i) for i in expand_split_str(self.data.getlist(d))],split])    
                            if intersec:
                                raise forms.ValidationError('index intersection')
                else:
                    raise forms.ValidationError('invalid format')
                            
                return split 
        else:
            raise ValidationError(_('Invalid format (example: 0,1,2:5,5 = 0,1,2,3,4,5)'))