def checkArgs(self): if not CmdOptions.checkArgs(self): return False if self.getString('select_pos') != 'random' and self.getInt('nr_pos_bags') > 1: printError(self.__class__.__name__, "given select_pos=random, nr_pos_bags shall be 1") return False return True
def checkArgs(self): if not CmdOptions.checkArgs(self): return False if self.getString('select_pos') != 'random' and self.getInt( 'nr_pos_bags') > 1: printError(self.__class__.__name__, "given select_pos=random, nr_pos_bags shall be 1") return False return True
def precompute_annotator(self, concept): INFO = 'dataengine.%s.precompute_annotator'%self.__class__.__name__ topn = 100 NegativeEngine.precompute_annotator(self, concept) for subconcept in concept.split('-'): expandedTagSet = set([subconcept] + wn_expand(subconcept)) try: datafile = os.path.join(ROOT_PATH, self.collection, 'SimilarityIndex', 'ngd', '%s.txt' % subconcept) rankedtags = readRankingResults(datafile) expandedTagSet = expandedTagSet.union(set([x[0] for x in rankedtags[:topn]])) except: printError(INFO, 'failed to load ranktag file for %s' % subconcept) self.annotator = self.annotator.union(expandedTagSet) printStatus(INFO, 'precomputing the virtual annotator for %s: %d tags' % (concept, len(self.annotator)))
def precompute_annotator(self, concept): INFO = 'dataengine.%s.precompute_annotator' % self.__class__.__name__ topn = 100 NegativeEngine.precompute_annotator(self, concept) for subconcept in concept.split('-'): expandedTagSet = set([subconcept] + wn_expand(subconcept)) try: datafile = os.path.join(ROOT_PATH, self.collection, 'SimilarityIndex', 'ngd', '%s.txt' % subconcept) rankedtags = readRankingResults(datafile) expandedTagSet = expandedTagSet.union( set([x[0] for x in rankedtags[:topn]])) except: printError(INFO, 'failed to load ranktag file for %s' % subconcept) self.annotator = self.annotator.union(expandedTagSet) printStatus( INFO, 'precomputing the virtual annotator for %s: %d tags' % (concept, len(self.annotator)))
def process(options, testCollection, trainCollection, annotationName, tagrelMethod, tagfeature): rootpath = options.rootpath overwrite = options.overwrite concepts = readConcepts(trainCollection, annotationName, rootpath) nr_of_concepts = len(concepts) mapping = dict(zip(concepts, range(nr_of_concepts))) feat_dir = os.path.join(rootpath, testCollection, 'FeatureData', tagfeature) binary_file = os.path.join(feat_dir, 'feature.bin') id_file = os.path.join(feat_dir, 'id.txt') shape_file = os.path.join(feat_dir, 'shape.txt') if checkToSkip(binary_file, overwrite): sys.exit(0) inputfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, tagrelMethod, 'id.tagvotes.txt') if not os.path.exists(inputfile): printError(INFO, '%s does not exist' % inputfile) sys.exit(0) makedirsforfile(binary_file) fw = open(binary_file, 'wb') processed = set() imset = [] count_line = 0 for line in open(inputfile): count_line += 1 elems = str.split(line.strip()) name = elems[0] if name in processed: continue processed.add(name) del elems[0] assert (len(elems) == 2 * nr_of_concepts) vec = [0] * nr_of_concepts for i in range(0, len(elems), 2): tag = elems[i] idx = mapping[tag] score = float(elems[i + 1]) vec[idx] = score s = float(sum(vec)) # l_1 normalized vec = np.array([x / s for x in vec], dtype=np.float32) vec.tofile(fw) imset.append(name) fw.close() fw = open(id_file, 'w') fw.write(' '.join(imset)) fw.close() fw = open(shape_file, 'w') fw.write('%d %d' % (len(imset), nr_of_concepts)) fw.close() print('%d lines parsed, %d ids -> %d unique ids' % (count_line, len(processed), len(imset)))
def process(options, testCollection, trainCollection, annotationName, tagrelMethod, tagfeature): rootpath = options.rootpath overwrite = options.overwrite concepts = readConcepts(trainCollection, annotationName, rootpath) nr_of_concepts = len(concepts) mapping = dict(zip(concepts,range(nr_of_concepts))) feat_dir = os.path.join(rootpath, testCollection, 'FeatureData', tagfeature) binary_file = os.path.join(feat_dir, 'feature.bin') id_file = os.path.join(feat_dir, 'id.txt') shape_file = os.path.join(feat_dir,'shape.txt') if checkToSkip(binary_file, overwrite): sys.exit(0) inputfile = os.path.join(rootpath, testCollection, 'autotagging', testCollection, trainCollection, tagrelMethod, 'id.tagvotes.txt') if not os.path.exists(inputfile): printError(INFO, '%s does not exist' % inputfile) sys.exit(0) makedirsforfile(binary_file) fw = open(binary_file, 'wb') processed = set() imset = [] count_line = 0 for line in open(inputfile): count_line += 1 elems = str.split(line.strip()) name = elems[0] if name in processed: continue processed.add(name) del elems[0] assert(len(elems) == 2 * nr_of_concepts) vec = [0] * nr_of_concepts for i in range(0, len(elems), 2): tag = elems[i] idx = mapping[tag] score = float(elems[i+1]) vec[idx] = score s = float(sum(vec)) # l_1 normalized vec = np.array([x/s for x in vec], dtype=np.float32) vec.tofile(fw) imset.append(name) fw.close() fw = open(id_file, 'w') fw.write(' '.join(imset)) fw.close() fw = open(shape_file, 'w') fw.write('%d %d' % (len(imset), nr_of_concepts)) fw.close() print ('%d lines parsed, %d ids -> %d unique ids' % (count_line, len(processed), len(imset)))