示例#1
0
def cal_del(y_max, y_min):
    #find the difference
    dif_ent_max = None
    dif_ent_min = None
    dif_men = None
    nalda = [0,0,0,0]
    for item in y_max:
        if y_max[item].get_name() != y_min[item].get_name():
            dif_men = item
            dif_ent_max = y_max[item]
            dif_ent_min = y_min[item]
    #print dif_ent_max.get_name() + '\t' + dif_ent_min.get_name()
    if dif_men is not None and dif_ent_max is not None and dif_ent_min is not None:
        f_rtf_max = dif_ent_max.get_rtf()
        f_rtf_min = dif_ent_min.get_rtf()
        nalda[0] = f_rtf_max - f_rtf_min
        f_es_max = Feature.es(dif_ent_max.get_name(), mention2ID[dif_men].get_name())
        f_es_min = Feature.es(dif_ent_min.get_name(), mention2ID[dif_men].get_name())
        nalda[1] = f_es_max - f_es_min
        f_rpm_max = dif_ent_max.get_rpm()
        f_rpm_min = dif_ent_min.get_rpm()
        nalda[2] = f_rpm_max - f_rpm_min
        f_tspr_max = 0
        f_tspr_min = 0
        for m in y_max:
            if m != dif_men:
                f_tspr_max += Feature.tspr(dif_ent_max.get_name(), y_max[m].get_name(),eeDic)
                f_tspr_min += Feature.tspr(dif_ent_min.get_name(), y_min[m].get_name(),eeDic)
        nalda[3] = f_tspr_max - f_tspr_min
    return nalda
示例#2
0
def NeuralNetwork(data_type, train_data, train_label, validation_data,
                  validation_label, test_data, test_label):
    nn = NeuralNetworkClassifier(data_type)
    train_data = Feature.flatten_feature(train_data, 0)
    validation_data = Feature.flatten_feature(validation_data, 0)
    test_data = Feature.flatten_feature(test_data, 0)
    nn.train(train_data, train_label, validation_data, validation_label)
    prediction = nn.prediction(test_data)
    evaluate(prediction, test_label)
def getAll4RectangleFeatures(integralInmage, allfeature, initialWidth,
                             initialHeight):
    maxHeight = max(initialHeight, 2)
    maxWidth = max(initialWidth, 2)
    for x in range((integralInmage.shape)[0] - maxHeight):
        for y in range((integralInmage.shape)[1] - maxWidth):
            allfeature.append(
                f.Features('4Rectangle', (x, y), maxWidth, maxHeight, 1, 1))
            allfeature.append(
                f.Features('4Rectangle', (x, y), maxWidth, maxHeight, -1, 1))
示例#4
0
 def generate_feature(self, type):
     feature = Feature(self, type)
     if (type == "rock"):
         feature.move_pos(x = 960-30, y = 300)
         initial_y = randint(0, 240)
         feature.move_pos(y = initial_y)
     if (type == "parallax_cloud_back"):
         initial_y = randint(0, 50)
         feature.move_pos(x = 960-30, y = 0)
         feature.move_pos(y = initial_y)
     self.feature_list.append(feature)
def getAll3RectangleHorizontalFeatures(integralInmage, allfeature,
                                       initialWidth, initialHeight):
    maxHeight = max(initialHeight, 3)
    maxWidth = max(initialWidth, 1)
    for x in range((integralInmage.shape)[0] - maxHeight):
        for y in range((integralInmage.shape)[1] - maxWidth):
            allfeature.append(
                f.Features('3RectangleHorizontal', (x, y), maxWidth, maxHeight,
                           1, 1))
            allfeature.append(
                f.Features('3RectangleHorizontal', (x, y), maxWidth, maxHeight,
                           -1, 1))
def genJournalIdFeature(instances, paperList, maxJournalId):
    sys.stderr.write("genJournalIdFeature\n")
    d = {}
    for line in paperList:
        paperId = int(line[0])
        journalId = int(line[4])
        d[paperId] = journalId
    feature = Feature(maxJournalId)
    for instance in instances:
        authorId, paperId = instance[0], instance[1]
        journalId = d[paperId] + 1 # -1
        feature.addLine([[journalId, 1.0]])
    feature.fix()
    return feature
def genConferenceIdFeature(instances, paperList, maxConferenceId):
    sys.stderr.write("genConferenceIdFeature\n")
    d = {}
    for line in paperList:
        paperId = int(line[0])
        conferenceId = int(line[3])
        d[paperId] = conferenceId
    feature = Feature(maxConferenceId)
    for instance in instances:
        authorId, paperId = instance[0], instance[1]
        conferenceId = d[paperId] + 1 # -1
        feature.addLine([[conferenceId, 1.0]])
    feature.fix()
    return feature
示例#8
0
def preprocess(config_path, contest):
    config_path = os.path.join(config_path, "*.json")
    config_list = glob.glob(config_path)
    config_list = sorted(config_list)
    for config in config_list:
        is_train = True
        if "Test" in config:
            is_train = False
        name = config.split(os.path.sep)[-1][:-5]
        setting = Setting(path=config, name=name)
        path = setting.rawDataPath
        path = os.path.join(path, name)
        data = Data()
        feature = Feature(setting)
        for sequence in range(setting.splitNum):
            X_train, y_train = data.processSubject(
                path,
                is_train=is_train,
                contest=contest,
                split_number=setting.splitNum,
                sequence=sequence)
            X_pca_train, y_pca_train = feature.pca(X_train,
                                                   y_train,
                                                   window_length=30)
            feature.saveToDisk(feature_name="pca",
                               name=str(sequence),
                               is_train=is_train)
            X_fft_train, y_fft_train = feature.fft(X_train,
                                                   y_train,
                                                   window_length=30)
            feature.saveToDisk(feature_name="fft",
                               name=str(sequence),
                               is_train=is_train)
示例#9
0
def add_factors(mapping, theta):
    numerator = 1
    intAs = []  # {the assignment of entities, only entities}
    for k in mapping:
        f_rtf = mapping[k].get_rtf()
        f_es = Feature.es(mention2ID[k].get_name(), mapping[k].get_name())
        numerator = numerator * math.exp(f_rtf * theta[0])
        numerator = numerator * math.exp(f_es * theta[1])
        f_rpm = mapping[k].get_rpm()
        numerator = numerator * math.exp(f_rpm * theta[2])
        intAs.append(mapping[k]) # iniAs <- mappings

    for com in itertools.combinations(intAs, 2):
        f_tspr = Feature.tspr(com[0].get_name(), com[1].get_name(),eeDic)
        numerator = numerator * math.exp(f_tspr * theta[3])
    return intAs, numerator
示例#10
0
    def classify(self, testing_data):
        prediction_result = []
        for instance in range(len(testing_data)):
            feature = Feature.basicFeaturesExtract(testing_data[instance])
            prediction_result.append(self.prediction(feature))

        return prediction_result
示例#11
0
文件: matrix.py 项目: byee4/rbp-maps
def retained_intron(annotation, density,
                    exon_offset, intron_offset,
                    annotation_type="rmats"):
    """
    Creates an r x c pandas dataframe of r events for a
    Retained Intron (RI) feature.

    A RI matrix will contain two distinct regions:

    |_]----||----[_|

    Parameters
    ----------
    annotation : str
        path of file containing the annotation
    density : density.ReadDensity
        object containing the positive and negative BigWig files
    exon_offset : int
        how far into the exon boundary to plot
    intron_offset : int
        how far from the exon boundary to plot
    annotation_type : str
        may be rmats format or any additional defined format in Feature

    Returns
    -------
    pandas.DataFrame : dataframe of r events for an MXE feature.
    """

    three_upstream = {}
    five_downstream = {}
    with open(annotation) as f:
        for line in f:
            if not line.startswith('event_name') and not line.startswith('ID') and not line.startswith('annotation'):
                event = line.rstrip()  # .split('\t')[0]
                upstream_interval, downstream_interval = Feature.Retained_intron(
                    event,
                    annotation_type
                ).get_bedtools()

                """three prime upstream region"""
                wiggle = intervals.three_prime_site(
                    density, downstream_interval, upstream_interval,
                    exon_offset, intron_offset
                )
                three_upstream[event] = wiggle

                """five prime site of downstream region"""
                wiggle = intervals.five_prime_site(
                    density, upstream_interval, downstream_interval,
                    exon_offset, intron_offset
                )
                five_downstream[event] = wiggle

        three_upstream = pd.DataFrame(three_upstream).T
        five_downstream = pd.DataFrame(five_downstream).T

    ra = pd.concat([three_upstream, five_downstream], axis=1)
    ra.columns = range(0, ra.shape[1])
    return ra
示例#12
0
    def __init__(self, id, nr_feat, init_pos_distr, init_feat_distr, var_init):
        """
        :param id: integer describing vehicle number
        :param pos_init: Initial position for example from a measurement.
        :param pos_cov_init: Initial covariance of the position estimate
        :param var_init: Process noise!!!
        :return:

        """
        self.id = id
        self.pos_belief = init_pos_distr
        self.var = var_init
        self.t_s = 1.0
        self.A = np.matrix([[1, 0, self.t_s, 0], [0, 1, 0, self.t_s],
                            [0, 0, 1, 0], [0, 0, 0, 1]])
        self.B = np.matrix('1 0 0 0; ' '0 1 0 0; ' '0 0 1 0; ' '0 0 0 1')
        # self.Q = np.matrix([[self.var[0], 0, 0, 0],
        #                     [0, self.var[1], 0, 0],
        #                     [0, 0, self.var[2], 0],
        #                     [0, 0, 0, self.var[3]]])
        self.Q = self.var * np.matrix([[
            (self.t_s**3) / 3, 0, (self.t_s**2) / 2, 0
        ], [0, (self.t_s**3) / 3, 0,
            (self.t_s**2) / 2], [(self.t_s**2) / 2, 0, self.t_s, 0],
                                       [0, (self.t_s**2) / 2, 0, self.t_s]])
        self.updt_pos_belief = None
        self.m_xg = [None for i in range(nr_feat)]
        self.m_gx_covinv = [None for i in range(nr_feat)]
        self.m_gx_covinvmu = [None for i in range(nr_feat)]
        self.visible_feat = []
        self.feat = [
            Feature.Feature(i, init_feat_distr[i]) for i in range(nr_feat)
        ]
示例#13
0
文件: matrix.py 项目: byee4/rbp-maps
def scaled_region(
        annotation, density, annotation_type,
        upstream_offset, downstream_offset, normalize
):
    densities = {}
    # TODO: pd.DataFrame.from_dict(dic, orient="index")
    with open(annotation) as f:
        for line in f:
            if not line.startswith('event_name') and not line.startswith('ID') and not line.startswith('annotation'):
                event = line.rstrip()
                interval = Feature.Feature(
                    event, annotation_type
                ).get_bedtool()

                wiggle = intervals.generic_site(
                    density,
                    interval,
                    upstream_offset,
                    downstream_offset
                )
                if normalize:
                    wiggle = intervals.get_scale(wiggle)
                densities[intervals.rename_index(interval)] = wiggle
    try:
        return pd.DataFrame(densities).T
    except Exception as e:
        print(e)
        print("found different length features")
        for key, value in densities.iteritems():
            densities[key] = intervals.get_scale(value)
        return pd.DataFrame(densities).T
示例#14
0
 def __init__(self, config):
     self._config = config
     self._imglst = config.ImageList()
     self._graph = Feature.Graph(config)
     self._graph.ConstructGraph()
     self._feature_loc = {}
     for img in self._imglst:
         key = Config.ShortName(img)
         self._feature_loc[key] = config.LoadFeature(img)
示例#15
0
 def __init__(self):
     self.display = display()
     self.updater = updater(self.display)
     self.connected = 0
     self.prev = -2
     self.connect_status = Feature.Connected(self.display, "single", 2, 31,
                                             1)
     self.leftEye = Feature.Eye(self.display, "null", 6, 11, [4, 2])
     self.rightEye = Feature.Eye(self.display, "null", 6, 19, [4, 2])
     self.mouth = Feature.Mouth(self.display, "null", 13, 15, [4, 2])
     self.brow1 = Feature.Eyebrow(self.display, "null", 3, 11, [3])
     self.brow2 = Feature.Eyebrow(self.display, "null", 3, 19, [3])
     self.cheek1 = Feature.Cheek(self.display, "null", 10, 6, [1])
     self.cheek2 = Feature.Cheek(self.display, "null", 10, 24, [1])
     self.drawing = Feature.Drawing(self.display, "null")
示例#16
0
 def OnAddFeature(self, event):
     if self.FeatName.GetLineText(0) == '': #If no Name is present in text box, do not add template
         print 'Error - Enter Object Desription'
     else:
         Name = self.FeatName.GetLineText(0)
         Template = self.OpenCV.GetROIGray()
         self.FeatureDict[Name] = Feature.Feature('Template', Template)
         self.FeatName.Clear() #Clear the Name text box
     #self.FeatureCtrl.Append(Decription)
     self.FeatureCtrl.Append((Name, str(self.FeatureDict[Name].Type)))
def genCoauthorFeature(instances, pathFname, maxAuthorId):
    '''
    return Feature(sparse, [features])
    '''
    sys.stderr.write("genCoauthorFeature\n")
    paperAuthorDict = {}
    csvReader = csv.reader(file(pathFname))
    csvReader.next()
    counter = Counter("paperAuthorDict")
    for line in csvReader:
        counter.inc()
        authorId, paperId = int(line[0]), int(line[1])
        paperAuthorDict.setdefault(authorId, set())
        paperAuthorDict[authorId].add(paperId)
    feature = Feature(maxAuthorId)
    counter = Counter("instance", 1000)
    for line in instances:
        counter.inc()
        authorId, paperId = line[0], line[1]
        feature.addLine(map(lambda x: [int(x), 1.0], paperAuthorDict[paperId]))
    feature.fix()
    return feature
 def __read_feature_file(self, filename):
     cost = 20
     file = open(filename)
     for line in file:
         data = string.split(line.replace('\n', ''))
         labels = string.split(data[3], ',')
         feature_location = None
         if len(data) > 4:
             feature_location = data[4]
         self.__features.append(
             Feature(data[0], data[2], int(data[1]), cost, labels,
                     feature_location))
     file.close()
示例#19
0
文件: GffIO.py 项目: yuroubaba/PAV
 def nextFeat(self):
     while True:
         line = self.handle.readline().rstrip()
         if not line:
             break
         f = line.split("\t")
         info = self.__ParseInfo(f[8])
         keys = [
             'seqid', 'source', 'type', 'start', 'end', 'score', 'strand',
             'phase'
         ]
         feat = Feature.Feature({**dict(zip(keys, f[0:7])), **info})
         yield feat
示例#20
0
def thread_job(path, setting, sequence, is_train, feature_name):
    X_train, y_train = data.processSubject(path,
                                           is_train=is_train,
                                           contest=contest,
                                           split_number=setting.splitNum,
                                           sequence=sequence)
    feature = Feature(setting)
    X_pca_train, y_pca_train = feature.pca(X_train, y_train, window_length=30)
    feature.saveToDisk(feature_name="pca",
                       name=str(sequence),
                       is_train=is_train)
    X_fft_train, y_fft_train = feature.fft(X_train, y_train, window_length=30)
    feature.saveToDisk(feature_name="fft",
                       name=str(sequence),
                       is_train=is_train)
示例#21
0
	def _NextFeature(self):
		#更新Row
		procChangedFunctions = {
			OpenMode.ReadOnly: self._NextWithSaveNothing,
			OpenMode.ReadWrite: self._NextWithOverwritten,
			OpenMode.WriteOnly: self._NextWithCreated
			}
		if self.isCurrentFeatureFresh_:
			return
		row = procChangedFunctions[self.openMode_]()
		self.isCurrentFeatureFresh_ = True
		if not row:
			self.currentFeature_ = None
		else:
			self.currentFeature_ = Feature(row, self)
示例#22
0
 def add_features_and_weights(self, feature_names, weights):
     problem_features = []
     for feature_number, name in enumerate(feature_names):
         weight = 0
         if feature_number < len(weights):
             weight = weights[feature_number]
         feature = Feature(name, weight)
         if feature.element is not None:
             self.features.append(feature)
         else:
             problem_features.append(name)
     if len(problem_features) > 0:
         print("Problem features:")
         for feature in problem_features:
             print(feature)
示例#23
0
 def calculate_conditional_probabilities(self, image_data, labels):
     y = list(set(labels))
     c_fi_y = {}
     for i in range(len(labels)):
         label = labels[i]
         image_pixel = Feature.basicFeaturesExtract(image_data[i])
         if label not in c_fi_y:
             c_fi_y[label] = np.array(image_pixel)
         else:
             c_fi_y[label] += np.array(image_pixel)
     self.conditional_probabilities = {}
     c_FI_y = self.y_Distribution
     for label in c_fi_y:
         self.conditional_probabilities[label] = np.divide(
             c_fi_y[label] + self.k, float(c_FI_y[label] + 2 * self.k))
示例#24
0
文件: matrix.py 项目: xjyx/rbp-maps
def same_length_region(annotation, density, annotation_type, upstream_offset,
                       downstream_offset, scale):
    """
    Produces a matrix corresponding to a region that either is scale or
    anchored at one central point. This means all BED file intervals are or
    intend to be the same lengths.

    Parameters
    ----------
    annotation
    density
    annotation_type
    upstream_offset
    downstream_offset
    scale

    Returns
    -------

    """
    densities = {}
    # TODO: pd.DataFrame.from_dict(dic, orient="index")
    with open(annotation) as f:
        for line in f:
            if not line.startswith('event_name') and not \
                    line.startswith('ID') and not \
                    line.startswith('annotation'):  # assume there is a header
                event = line.rstrip()
                interval = Feature.Feature(event,
                                           annotation_type).get_bedtool()

                wiggle = intervals.generic_site(density, interval,
                                                upstream_offset,
                                                downstream_offset)

                if scale:
                    wiggle = intervals.get_scale(wiggle)
                densities[intervals.rename_index(interval)] = wiggle
                # if len(wiggle) != 601: # i saw some cds start sites that are more than 1 nt long.
                #     print(len(wiggle), event)
    try:
        return pd.DataFrame(densities).T
    except Exception as e:
        print(e)
        print("found different length features")
        for key, value in densities.iteritems():
            densities[key] = intervals.get_scale(value)
        return pd.DataFrame(densities).T
def examineWithPower(weakClassifiers, image, powerW, powerH):
    totalError = 0
    counter = 1
    for feature in weakClassifiers:
        newLeftUp = (feature.leftUp[0] * powerH, feature.leftUp[1] * powerW)
        newFeature = f.Features(feature.type, newLeftUp,
                                feature.width * powerW,
                                feature.height * powerH, feature.polarity, 2)
        totalError = totalError + newFeature.getLable(image)
        if counter == 1 and totalError < 0:
            return 0
        counter += 1

    if totalError >= 0:
        return 1
    else:
        return 0
示例#26
0
文件: matrix.py 项目: xjyx/rbp-maps
def meta(annotation,
         density,
         upstream_offset,
         downstream_offset,
         annotation_type="bed",
         scale_to=100):
    # TODO: implement upstream and downstream CDS features.
    densities = {}
    # TODO: we dont need this? No need to collapse transcripts
    # df = intervals.merge(annotation)
    # df = intervals.explode(df)
    df = pd.read_table(
        annotation, names=['chrom', 'start', 'end', 'name', 'score', 'strand'])
    genes = df.groupby('name').apply(intervals.make_linelist_from_dataframe)
    progress = trange(len(genes))
    for name, gene in genes.iteritems():
        feature = Feature.MetaFeature(gene, annotation_type).get_bedtools()
        wiggle = np.array(
            [])  # create wiggle with all CDS values for each gene.
        # check positive strand based on first element encountered
        if feature[0].strand == '+':
            # if positive, go from lower to higher
            for interval in feature:
                wig_segment = intervals.generic_site(density, interval, 0, 0)
                wiggle = np.append(wiggle, wig_segment)
        elif feature[0].strand == '-':
            # if negative, go from higher to lower
            for interval in reversed(feature):
                wig_segment = intervals.generic_site(density, interval, 0, 0)
                wiggle = np.append(wiggle, wig_segment)
        # if len(wiggle) != 0 and 'HepG2_EIF4B_626_intersecting_CDS.bed' in annotation and name == 'ENST00000379389.4':
        #     print("WRITING INTERMEDIATE WIGGLE TO FILE")
        #     with open('/home/bay001/projects/eric_clip_paper_20180120/permanent_data/conservation/intermediates/EIF4B.CDS.ENST00000379389.before_scaling.txt', 'w') as f:
        #         for w in wiggle:
        #             f.write("{}\n".format(w))
        #    sys.exit(1)
        wiggle = intervals.get_scale(wiggle, scale_to=scale_to)
        densities[name] = wiggle
        progress.update(1)
    try:

        return pd.DataFrame(densities).T
    except Exception as e:
        print(e)
        print("found different length features")
    def __read_feature_file(self, filename):
        cost = 20
        x_min, x_max, y_min, y_max, t_min, t_max = 0, 0, 0, 0, 0, 0
        #-1, -1, -1, -1, -1, -1;
        labels = []
        feature_location = ''

        file = open(filename)
        for line in file:
            data = string.split(line.replace('\n', ''))
            for i in range(3, len(data)):
                content = data[i].split(':')
                if content[0] == 'labels':
                    labels = content[1].split(',')
                elif content[0] == 'xmin':
                    x_min = int(content[1])
                elif content[0] == 'xmax':
                    x_max = int(content[1])
                elif content[0] == 'ymin':
                    y_min = int(content[1])
                elif content[0] == 'ymax':
                    y_max = int(content[1])
                elif content[0] == 'tmin':
                    t_min = int(content[1])
                elif content[0] == 'tmax':
                    t_max = int(content[1])
                elif content[0] == 'location':
                    feature_location = content[1]

            # labels = string.split(data[3],',') if len(data) > 3 else []
            # #name,path,time,cost,label,location
            # feature_location = data[4] if len(data) > 4 else ''
            # x_min = int(data[5]) if len(data) > 5 else -1
            # x_max = int(data[6]) if len(data) > 6 else -1
            # y_min = int(data[7]) if len(data) > 7 else -1
            # y_max = int(data[8]) if len(data) > 8 else -1
            # t_min = int(data[9]) if len(data) > 9 else -1
            # t_max = int(data[10]) if len(data) > 10 else -1

            self.__features.append(
                Feature(data[0], data[2], int(data[1]), cost, labels,
                        feature_location,
                        [x_min, x_max, y_min, y_max, t_min, t_max]))
        file.close()
示例#28
0
    def train(self, training_data, training_labels, iterations):
        weight = np.zeros((training_data[0].height, training_data[0].width))

        for i in training_labels:
            if i not in self.weights:
                self.weights[i] = weight

        for it in range(iterations):
            for instance_number in range(len(training_labels)):
                feature = Feature.basicFeaturesExtract(
                    training_data[instance_number])
                true_label = training_labels[instance_number]
                prediction_label = self.prediction(feature)
                # ---- Updata weight for each class ---
                if prediction_label != true_label:
                    self.weights[
                        true_label] = self.weights[true_label] + feature
                    self.weights[prediction_label] = self.weights[
                        prediction_label] - feature
 def __read_feature_file(self, filename):
     cost = 20
     file = open(filename)
     for line in file:
         data = string.split(line.replace('\n', ''))
         labels = string.split(data[3], ',') if len(data) > 3 else []
         #name,path,time,cost,label,location
         feature_location = data[4] if len(data) > 4 else ''
         x_min = int(data[5]) if len(data) > 5 else -1
         x_max = int(data[6]) if len(data) > 6 else -1
         y_min = int(data[7]) if len(data) > 7 else -1
         y_max = int(data[8]) if len(data) > 8 else -1
         t_min = int(data[9]) if len(data) > 9 else -1
         t_max = int(data[10]) if len(data) > 10 else -1
         self.__features.append(
             Feature(data[0], data[2], int(data[1]), cost, labels,
                     feature_location,
                     [x_min, x_max, y_min, y_max, t_min, t_max]))
     file.close()
def run(j):
    #for j in xrange(7):
    for i in xrange(10):
        featureName = None
        channels = None
        if j in range(5):
            featureName = "Dog_" + str(j + 1)
            if j == 4:
                channels = 15
            else:
                channels = 16
        else:
            if j == 6:
                channels = 24
            else:
                channels = 15
            featureName = "Patient_" + str(j - 4)
        feature = Feature(featureName)
        processor = Processor()
        basePath = "/home/xiaobin/raw_data/" + feature.subjectName
        print basePath
        X_train, y_train = processor.processDataPerSubject(basePath,
                                                           trainOrTest="train",
                                                           splitNum=10,
                                                           sequence=i)
        X_train, y_train = feature.pca(X_train, y_train)
        #X_train, y_train = feature.fft(X_train, y_train)
        print "X_train shape" + str(X_train.shape)
        feature.saveToDisk(trainOrTest="train", name=str(i))

        X_test, y_test = processor.processDataPerSubject(basePath,
                                                         trainOrTest="test",
                                                         splitNum=10,
                                                         sequence=i)
        #X_test, y_test = feature.fft(X_test, y_test )
        X_train, y_train = feature.pca(X_test, y_test)

        feature.saveToDisk(trainOrTest="test", name=str(i))
示例#31
0
def parseGTFToGetGenes(gtf, tools):
    print "Parsing %s..."%gtf
    geneID2gene={}
    with open(gtf) as gtfFile:
        for line in gtfFile:
            if line[0]!="#":
                lineSplit = line.rstrip().split()
                feature = lineSplit[2]
                chromosome = lineSplit[0]
                begin = int(lineSplit[3])
                end = int(lineSplit[4])
                strand = lineSplit[6]
                geneId = lineSplit[lineSplit.index("gene_id") + 1][1:-2]

                if feature=="gene":
                    geneID2gene[geneId] = Gene(geneId, chromosome, begin, end, strand, tools)
                elif feature=="transcript":
                    transcriptId = lineSplit[lineSplit.index("transcript_id") + 1][1:-2]
                    geneID2gene[geneId].transcriptId2Transcript[transcriptId]=Feature(transcriptId, chromosome, begin, end, strand, tools, geneID2gene[geneId])
    print "Parsing %s - Done!" % gtf
    return geneID2gene
示例#32
0
    def calculate_conditional_probabilities(self, image_data, labels):
        # Function to calculate Conditional Probabilities-- P( F=fi\Y = y)
        y = list(set(labels))
        c_fi_y = {}
        for i in range(len(labels)):
            label = labels[i]
            # This cycle_finder function takes way more time than normal feature extraction function that takes our
            # naive bayes much more time to run
            image_pixel = Feature.cycle_finder(image_data[i])

            if label not in c_fi_y:
                c_fi_y[label] = np.array(image_pixel)
            else:
                c_fi_y[label] += np.array(image_pixel)
        self.conditional_probabilities = {}
        c_FI_y = self.y_Distribution
        for label in c_fi_y:
            self.conditional_probabilities[label] = np.divide(
                c_fi_y[label] + self.k, float(c_FI_y[label] + 2 * self.k))

        self.conditional_probabilities[label][-1] *= 10
示例#33
0
    def calculate_log_joint_probabilities(self, datum):
        """
        Returns the log-joint distribution over legal labels and the datum.
        """
        log_joint_probabilities = {}
        image_pixels = Feature.basicFeaturesExtract(datum)
        for label in self.y_Distribution:
            log_prior_distribution = math.log(self.y_prior[label])
            log_conditional_probabilities_1 = np.log(
                self.conditional_probabilities[label])
            log_conditional_probabilities_0 = np.log(
                1 - self.conditional_probabilities[label])

            log_joint_probabilities[label] = np.sum(
                np.array(image_pixels) * log_conditional_probabilities_1,
                dtype=float)
            log_joint_probabilities[label] += np.sum(
                np.array(image_pixels) * log_conditional_probabilities_0,
                dtype=float)
            log_joint_probabilities[label] += log_prior_distribution

        return log_joint_probabilities
 def generate_features(self, word, history):
     history  = self.pad_history(history)
     features = Feature.eval(word, history)
     return features
示例#35
0
# data can be weights or features but they are a list
def load(data, fname="BOWmodel_train"):
    assert type(data) == list and type(fname) == str


_examples = load(Train=True, num_docs=2000)
dev_set = load(num_docs=500)

# last is tags in examples
permute(_examples)

for each in _examples:
    examples.append(each[:-1])
    target.append(each[-1])

bow = Feature.feature("bow", examples, dev_set)

example_features = bow.get_incremental_features(examples)

classes = set(target)
classifyers = []

for each in classes:
    Y = np.array([1 if x == each else 0 for x in target])
    clf = GaussianNB()
    clf.fit(X, Y)
    classifyers.append(clf)

pred = []
for i, keyword in enumerate(classes):
    pred = classifyers[i].predict(Dev)