Пример #1
0
    def run(self, args):
        a = feature_matrix.FeatureMatrix().load(args.infile)
        if args.labels is True:
            print "Stored ", len(a.metadata.filename), " files"
            for f in a.metadata.filename:
                print f
            exit()

        #classe = a.metadata.filename.split("/")[-1].split(".")[0]

        # verification
        #print a.metadata.feature
        #print a.data, a.data.shape
        feature_names = a.metadata.feature.split()
        feature_names.sort()
        if args.features is True:
            if len(feature_names) != a.data.shape[1]:
                print "Feature names are inconsistent with data!"
                print feature_names
                print len(feature_names)
                print a.data.shape
                exit()

            for f in feature_names:
                sys.stdout.write(f)
                if args.csv is True:
                    sys.stdout.write(",")
                sys.stdout.write(" ")
            print "label"
        else:
            for i in xrange(len(a.metadata.filename)):
                for j in xrange(a.data.shape[1] - 1):
                    sys.stdout.write(str(a.data[i, j]))
                    if args.csv is True:
                        sys.stdout.write(",")
                    sys.stdout.write(" ")
                sys.stdout.write(str(a.data[i, j + 1]))
                sys.stdout.write(", ")
                #A linha seguinte esta porquinha.
                #masss (requer que o nome da classe seja o primeiro no arquivo
                sys.stdout.write(
                    a.metadata.filename[i].split("/")[-1].split(".")[0])
                print ""
Пример #2
0
    def run(self, args):
        a = feature_matrix.FeatureMatrix().load(args.database)
        file_label_dict = self.label_list(open(args.labels, 'rb'))

        # Start join process
        output = ""
        # Title row
        output += "filename,"
        for i in xrange(a.data.shape[1]):
            output += "F" + str(i) + ","
        output += "Class\n"

        for i in xrange(len(a.metadata.filename)):
            fname = a.metadata.filename[i].split('/')[-1]
            output += fname + ","
            for j in xrange(a.data.shape[1]):
                output += str(a.data[i][j]) + ","
            output += str(file_label_dict[fname]) + "\n"

        print output
Пример #3
0
    def run(self):

        print "training model from train filelist: %s" % (
            self.params['general']['train_filelist'])

        m = feature_matrix.FeatureMatrix()
        mf = open(self.params['general']['scratch_directory'] + "/" +
                  self.params['feature_aggregation']['aggregated_output'])
        m = m.load(mf)
        mf.close()

        with open(self.params['general']['train_filelist']) as f:
            linhas = f.readlines()

        files = dict()

        for i in xrange(len(m.metadata.filename)):
            files[m.metadata.filename[i]] = i

        labels = []
        features = []

        for i in xrange(len(linhas)):
            filename = linhas[i].split("\t")[0].strip()
            label = linhas[i].split("\t")[1].strip()
            labels.append(label)

            feat = m.data[files[filename]]
            features.append(feat)

        features = numpy.array(features)

        files = None

        input = ModelTrainerInput(features, labels)

        gc.collect()

        self.train(input)
Пример #4
0
    def run(self, args):
        o = track.FeatureTrack()
        o = o.load(args.trackfile)
        #print o.metadata
        #print o.data

        # Read label file
        onsets = []
        offsets = []
        labels = []
        with open(args.labelfile) as f:
            content = f.readlines()
            for line in content:
                L = line.split()
                #print L
                onsets.append(float(L[0]))
                offsets.append(float(L[1]))
                L[2] = L[2].replace('_', '-')
                L[2] = L[2].replace('-', '+')
                labels.append(str(L[2].split('+')[0]))
                #print onsets[-1], offsets[-1], labels[-1]
        #exit()

        final_output = None
        final_filenames = []
        final_filenames.append(o.metadata.filename)
        if o.data.ndim == 1:
            o.data.shape = (o.data.size, 1)
        feats = o.metadata.feature.split()
        a = numpy.array(feats)
        i = a.argsort()
        ofs = o.metadata.sampling_configuration.ofs
        #print offsets[-1], o.data.shape, o.data.shape[0] / float(ofs)
        #print i

        my_features = o.metadata.feature.split()
        my_features.sort()
        new_features = ""
        if args.mean is True:
            for feat in my_features:
                new_features = new_features + " " + "mean_" + feat
        if args.variance is True:
            for feat in my_features:
                new_features = new_features + " " + "var_" + feat
        if args.slope is True:
            for feat in my_features:
                new_features = new_features + " " + "slope_" + feat
        if args.limits is True:
            for feat in my_features:
                new_features = new_features + " " + "max_" + feat
            for feat in my_features:
                new_features = new_features + " " + "argmax_" + feat
            for feat in my_features:
                new_features = new_features + " " + "min_" + feat
            for feat in my_features:
                new_features = new_features + " " + "argmin_" + feat

        new_features = new_features.strip()

        if args.csv is True:
            sys.stdout.write(new_features.replace(' ', ','))
            sys.stdout.write(',LABEL')
            sys.stdout.write('\n')

        #exit()

        for d in xrange(len(onsets)):
            out = numpy.array([])
            i = a.argsort()

            minN = int(onsets[d] * float(ofs))
            maxN = int(offsets[d] * float(ofs))
            if maxN <= (minN + 1):
                maxN = minN + 2
            #print minN, maxN, onsets[d], offsets[d], o.data.shape[0], ofs

            if args.mean is True:
                out = numpy.hstack((out, o.data[minN:maxN, :].mean(axis=0)[i]))
                #print out.shape, o.data[minN:maxN,:].mean(axis=0).shape

            if args.variance is True:
                out = numpy.hstack((out, o.data[minN:maxN, :].var(axis=0)[i]))

            if args.slope is True:
                variance = o.data[minN:maxN, :].var(axis=0)[i]
                lindata = numpy.zeros(variance.shape)
                for i in xrange(o.data[minN:maxN, i].shape[1]):
                    lindata[i] = scipy.stats.linregress(o.data[minN:maxN,i],\
                                     range(o.data[minN:maxN,:].shape[0]))[0]

                out = numpy.hstack((out, lindata))

            if args.csv is True:
                for i in xrange(len(out)):
                    sys.stdout.write(str(out[i]))
                    sys.stdout.write(",")
                sys.stdout.write(labels[d])
                sys.stdout.write('\n')

            if final_output is None:
                final_output = out
            else:
                final_output = numpy.vstack((final_output, out))

        p = feature_matrix.FeatureMatrix()
        p.data = final_output.copy()

        if args.normalize:
            std_p = p.data.std(axis=0)
            p.data = (p.data - p.data.mean(axis=0))/\
                    numpy.maximum(10**(-6), std_p)

        p.metadata.sampling_configuration = o.metadata.sampling_configuration
        p.metadata.feature = new_features
        p.metadata.filename = final_filenames
        p.save(args.outfile)
Пример #5
0
    def stats(self,
              feature_tracks,
              mean=False,
              variance=False,
              delta=False,
              acceleration=False,
              slope=False,
              limits=False,
              csv=False,
              normalize=False):

        final_output = None
        final_filenames = []

        for o in feature_tracks:

            #print o.metadata.feature

            a = numpy.array(o.metadata.feature.split())
            i = a.argsort()

            final_filenames.append(o.metadata.filename)
            if o.data.ndim == 1:
                o.data.shape = (o.data.size, 1)

            out = numpy.array([])

            if delta is True:
                #print "o.data shape", o.data.shape
                #print "out shape ", out.shape
                d = numpy.mean(delta_stat(o.data, order=1, axis=0), axis=0)[i]
                #print "d shape ", d.shape
                out = numpy.hstack((out, d))

            if acceleration is True:
                #print "o.data shape", o.data.shape
                #print "out shape ", out.shape
                d = numpy.mean(delta_stat(o.data, order=2, axis=0), axis=0)[i]
                #print "d shape ", d.shape
                out = numpy.hstack((out, d))

            if mean is True:
                out = numpy.hstack((out, o.data.mean(axis=0)[i]))

#            print out.shape

            if variance is True:
                out = numpy.hstack((out, o.data.var(axis=0)[i]))

#            print out.shape
            if slope is True:
                variance = o.data.var(axis=0)[i]
                lindata = numpy.zeros(variance.shape)
                for i in xrange(o.data.shape[1]):
                    lindata[i] = scipy.stats.linregress(o.data[:,i],\
                                            range(o.data.shape[0]))[0]

                out = numpy.hstack((out, lindata))

            if limits is True:
                out = numpy.hstack((out, o.data.max(axis=0)[i]))
                out = numpy.hstack(
                    (out, o.data.argmax(axis=0)[i] / float(o.data.shape[0])))
                out = numpy.hstack((out, o.data.min(axis=0)[i]))
                out = numpy.hstack(
                    (out, o.data.argmin(axis=0)[i] / float(o.data.shape[0])))

                out.shape = (1, out.size)

            if csv is True:
                for i in xrange(len(out) - 1):
                    sys.stdout.write(str(out[i]))
                    sys.stdout.write(",")
                sys.stdout.write(str(out[-1]))
                sys.stdout.write('\n')

            if final_output is None:
                final_output = out
            else:
                final_output = numpy.vstack((final_output, out))

        # Dealing with feature metadata:
        my_features = o.metadata.feature.split()
        my_features.sort()
        new_features = ""

        if delta is True:
            for feat in my_features:
                new_features = new_features + " " + "delta_" + feat
        if acceleration is True:
            for feat in my_features:
                new_features = new_features + " " + "accel_" + feat
        if mean is True:
            for feat in my_features:
                new_features = new_features + " " + "mean_" + feat
        if variance is True:
            for feat in my_features:
                new_features = new_features + " " + "var_" + feat
        if slope is True:
            for feat in my_features:
                new_features = new_features + " " + "slope_" + feat
        if limits is True:
            for feat in my_features:
                new_features = new_features + " " + "max_" + feat
            for feat in my_features:
                new_features = new_features + " " + "argmax_" + feat
            for feat in my_features:
                new_features = new_features + " " + "min_" + feat
            for feat in my_features:
                new_features = new_features + " " + "argmin_" + feat

        #print new_features

        p = feature_matrix.FeatureMatrix()
        p.data = final_output.copy()

        if normalize:
            std_p = p.data.std(axis=0)
            p.data = (p.data - p.data.mean(axis=0))/\
                    numpy.maximum(10**(-6), std_p)

        p.metadata.sampling_configuration = o.metadata.sampling_configuration
        p.metadata.feature = new_features
        p.metadata.filename = final_filenames

        return p