def __init__(self, featureSet):
     FeatureBuilder.__init__(self, featureSet)
     self.bacsu = readBacsu(os.path.expanduser("~/data/BioNLP11SharedTask/supporting-tasks/bacsu-modified.txt"))
     self.subti = readSubtiwiki(os.path.expanduser("~/data/BioNLP11SharedTask/supporting-tasks/Subtiwiki-Synonyms.csv"))
     #self.subti = readSubtiwiki(os.path.expanduser("~/cvs_checkout/JariSandbox/Wiki/subtiwiki/Subtiwiki-Synonyms.csv"))
     # OR the dictionaries
     self.any = {}
     for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))):
         self.any[key] = set()
         if self.bacsu.has_key(key):
             for value in self.bacsu[key]: 
                 self.any[key].add(value)
         if self.subti.has_key(key):
             for value in self.subti[key]: 
                 self.any[key].add(value)
         self.any[key] = list(self.any[key])
         self.any[key].sort()
     # AND the dictionaries
     self.all = {}
     for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))):
         self.all[key] = set()  
         allSynonyms = set()
         bacsuSet = set()
         if self.bacsu.has_key(key):
             bacsuSet = self.bacsu[key]
             for x in bacsuSet: allSynonyms.add(x)
         subtiSet = set()
         if self.subti.has_key(key):
             subtiSet = self.subti[key]
             for x in subtiSet: allSynonyms.add(x)
         for synonym in allSynonyms:
             if synonym in bacsuSet and synonym in subtiSet:
                 self.all[key].add(synonym)
         self.all[key] = list(self.all[key])
         self.all[key].sort()
Exemplo n.º 2
0
 def __init__(self, featureSet=None):
     FeatureBuilder.__init__(self, featureSet)
     drugBankFile = "/home/jari/data/DDIExtraction2011/resources/drugbank.xml"
     # Load drug data into memory on first call to constructor
     if DrugFeatureBuilder.data == None:
         DrugFeatureBuilder.data, DrugFeatureBuilder.nameToId = prepareDrugBank(drugBankFile)
         DrugFeatureBuilder.interactionPairs = buildInteractionPairs(DrugFeatureBuilder.data)
    def __init__(self, featureSet=None):
        FeatureBuilder.__init__(self, featureSet)
        if not hasattr(Settings, "DRUG_BANK_XML"):
            print >> sys.stderr, "Drug Bank XML not installed, installing now"
            installDrugBank(updateLocalSettings=True)
        drugBankFile = Settings.DRUG_BANK_XML
        #drugBankFile = "/home/jari/data/DDIExtraction2011/resources/drugbank.xml"
        # Load drug data into memory on first call to constructor
        if DrugFeatureBuilder.data == None:
            DrugFeatureBuilder.data, DrugFeatureBuilder.nameToId = prepareDrugBank(
                drugBankFile)

            DrugFeatureBuilder.tokenToId = {}
            for name in DrugFeatureBuilder.nameToId:
                splits = name.split()
                if len(splits) < 2:
                    continue
                for split in splits:
                    if split not in DrugFeatureBuilder.tokenToId:
                        DrugFeatureBuilder.tokenToId[split] = []
                    DrugFeatureBuilder.tokenToId[split].extend(
                        DrugFeatureBuilder.nameToId[name])
            for token in DrugFeatureBuilder.tokenToId:
                DrugFeatureBuilder.tokenToId[token] = sorted(
                    list(set(DrugFeatureBuilder.tokenToId[token])))

            DrugFeatureBuilder.interactionPairs = buildInteractionPairs(
                DrugFeatureBuilder.data)
Exemplo n.º 4
0
 def __init__(self, featureSet, style=None):
     FeatureBuilder.__init__(self, featureSet, style)
     if "wordvector" in style and isinstance(style["wordvector"], basestring):
         wordVectorPath = style["wordvector"]
     else:
         wordVectorPath = Settings.W2VFILE
     print >> sys.stderr, "Loading word vectors from", wordVectorPath
     self.model = WV.load(wordVectorPath, 100000, 10000000) #10000, 500000)
Exemplo n.º 5
0
 def __init__(self, featureSet):
     FeatureBuilder.__init__(self, featureSet)
     self.terms = {}
     self.byName = {}
     self.byKeyword = {}
     self.loadOBO(
         os.path.join(os.path.dirname(os.path.abspath(__file__)),
                      "OntoBiotope_BioNLP-ST-2016.obo"))
Exemplo n.º 6
0
 def __init__(self, featureSet):
     """
     This is called, when the ExampleBuilder object is created.
     
     @type featureSet: Core.IdSet
     @param featureSet: The feature ids
     """
     FeatureBuilder.__init__(self, featureSet)
Exemplo n.º 7
0
 def __init__(self, featureSet):
     """
     This is called, when the ExampleBuilder object is created.
     
     @type featureSet: Core.IdSet
     @param featureSet: The feature ids
     """
     FeatureBuilder.__init__(self, featureSet)
 def __init__(self, featureSet, style=None):
     """
     @type featureSet: IdSet
     @param featureSet: feature ids
     """
     FeatureBuilder.__init__(self, featureSet, style=style)
     #self.edgeFeatureBuilder = EdgeFeatureBuilder(featureSet)
     self.ontologyFeatureBuilder = None
     self.noAnnType = False
     self.predictedRange = None
 def __init__(self, featureSet=None):
     FeatureBuilder.__init__(self, featureSet)
     if not hasattr(Settings, "DRUG_BANK_XML"):
         print >> sys.stderr, "Drug Bank XML not installed, installing now"
         installDrugBank(updateLocalSettings=True)
     drugBankFile = Settings.DRUG_BANK_XML
     #drugBankFile = "/home/jari/data/DDIExtraction2011/resources/drugbank.xml"
     # Load drug data into memory on first call to constructor
     if DrugFeatureBuilder.data == None:
         DrugFeatureBuilder.data, DrugFeatureBuilder.nameToId = prepareDrugBank(drugBankFile)
         DrugFeatureBuilder.interactionPairs = buildInteractionPairs(DrugFeatureBuilder.data)
Exemplo n.º 10
0
def predict(nlp, cls, file_path_txt, out_file_path):
    fb = FeatureBuilder(nlp)
    features_matrix_str = fb.get_features_of_file(file_path_txt)

    pred_labels = cls.predict(features_matrix_str)

    out_file = open(out_file_path, 'w')
    for (idxs, features_list), label in zip(features_matrix_str, pred_labels):
        if label == 1:
            sent_num, obj1, obj2 = idxs
            sent_num = 'sent' + str(sent_num)
            obj1, obj2 = str(obj1), str(obj2)
            out_file.write(sent_num + '\t' + obj1 + '\t' + 'Live_In' + '\t' +
                           obj2 + '\t\n')

    out_file.close()
Exemplo n.º 11
0
def train_classifier(nlp, train_txt_file, train_annotation_file):
    fb = FeatureBuilder(nlp)
    features_matrix = fb.get_features_of_file(train_txt_file)

    annotation_dict, r2i = annotation_to_dict(train_annotation_file)
    lc = LabelChecker(annotation_dict, r2i)

    cls = MyClassifier(fb.features_to_index)
    gold_labels = lc.get_labels_of(features_matrix)
    cls.train_on(features_matrix, gold_labels)

    pred_labels = cls.predict(features_matrix)
    acc_all = accuracy_score(gold_labels, pred_labels)
    acc_filtered = accuracy_of(gold_labels, pred_labels)
    print 'train - accuracy all %0.2f%%' % (acc_all * 100.0)
    print 'train - accuracy filtered %0.2f%%' % (acc_filtered * 100.0)

    return cls
 def __init__(self, featureSet):
     FeatureBuilder.__init__(self, featureSet)
     #self.bacsu = readBacsu(os.path.expanduser("~/data/BioNLP11SharedTask/supporting-tasks/bacsu-modified.txt"))
     #self.subti = readSubtiwiki(os.path.expanduser("~/data/BioNLP11SharedTask/supporting-tasks/Subtiwiki-Synonyms.csv"))
     #self.subti = readSubtiwiki(os.path.expanduser("~/cvs_checkout/JariSandbox/Wiki/subtiwiki/Subtiwiki-Synonyms.csv"))
     if not hasattr(Settings, "TEES_RESOURCES"):
         print >> sys.stderr, "TEES example builder data files not installed, installing now"
         installRENData(updateLocalSettings=True)
     self.bacsu = readBacsu(
         os.path.join(Settings.TEES_RESOURCES, "bacsu-modified.txt"))
     self.subti = readSubtiwiki(
         os.path.join(Settings.TEES_RESOURCES, "Subtiwiki-Synonyms.csv"))
     # OR the dictionaries
     self.any = {}
     for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))):
         self.any[key] = set()
         if self.bacsu.has_key(key):
             for value in self.bacsu[key]:
                 self.any[key].add(value)
         if self.subti.has_key(key):
             for value in self.subti[key]:
                 self.any[key].add(value)
         self.any[key] = list(self.any[key])
         self.any[key].sort()
     # AND the dictionaries
     self.all = {}
     for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))):
         self.all[key] = set()
         allSynonyms = set()
         bacsuSet = set()
         if self.bacsu.has_key(key):
             bacsuSet = self.bacsu[key]
             for x in bacsuSet:
                 allSynonyms.add(x)
         subtiSet = set()
         if self.subti.has_key(key):
             subtiSet = self.subti[key]
             for x in subtiSet:
                 allSynonyms.add(x)
         for synonym in allSynonyms:
             if synonym in bacsuSet and synonym in subtiSet:
                 self.all[key].add(synonym)
         self.all[key] = list(self.all[key])
         self.all[key].sort()
Exemplo n.º 13
0
 def __init__(self, featureSet):
     FeatureBuilder.__init__(self, featureSet)
     self.bacsu = readBacsu(
         os.path.expanduser(
             "~/data/BioNLP11SharedTask/supporting-tasks/bacsu-modified.txt"
         ))
     self.subti = readSubtiwiki(
         os.path.expanduser(
             "~/data/BioNLP11SharedTask/supporting-tasks/Subtiwiki-Synonyms.csv"
         ))
     #self.subti = readSubtiwiki(os.path.expanduser("~/cvs_checkout/JariSandbox/Wiki/subtiwiki/Subtiwiki-Synonyms.csv"))
     # OR the dictionaries
     self.any = {}
     for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))):
         self.any[key] = set()
         if self.bacsu.has_key(key):
             for value in self.bacsu[key]:
                 self.any[key].add(value)
         if self.subti.has_key(key):
             for value in self.subti[key]:
                 self.any[key].add(value)
         self.any[key] = list(self.any[key])
         self.any[key].sort()
     # AND the dictionaries
     self.all = {}
     for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))):
         self.all[key] = set()
         allSynonyms = set()
         bacsuSet = set()
         if self.bacsu.has_key(key):
             bacsuSet = self.bacsu[key]
             for x in bacsuSet:
                 allSynonyms.add(x)
         subtiSet = set()
         if self.subti.has_key(key):
             subtiSet = self.subti[key]
             for x in subtiSet:
                 allSynonyms.add(x)
         for synonym in allSynonyms:
             if synonym in bacsuSet and synonym in subtiSet:
                 self.all[key].add(synonym)
         self.all[key] = list(self.all[key])
         self.all[key].sort()
 def __init__(self, featureSet):
     FeatureBuilder.__init__(self, featureSet)
     #self.bacsu = readBacsu(os.path.expanduser("~/data/BioNLP11SharedTask/supporting-tasks/bacsu-modified.txt"))
     #self.subti = readSubtiwiki(os.path.expanduser("~/data/BioNLP11SharedTask/supporting-tasks/Subtiwiki-Synonyms.csv"))
     #self.subti = readSubtiwiki(os.path.expanduser("~/cvs_checkout/JariSandbox/Wiki/subtiwiki/Subtiwiki-Synonyms.csv"))
     if not hasattr(Settings, "TEES_RESOURCES"):
         print >> sys.stderr, "TEES example builder data files not installed, installing now"
         installRENData(updateLocalSettings=True)
     self.bacsu = readBacsu(os.path.join(Settings.TEES_RESOURCES, "bacsu-modified.txt"))
     self.subti = readSubtiwiki(os.path.join(Settings.TEES_RESOURCES, "Subtiwiki-Synonyms.csv"))
     # OR the dictionaries
     self.any = {}
     for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))):
         self.any[key] = set()
         if self.bacsu.has_key(key):
             for value in self.bacsu[key]: 
                 self.any[key].add(value)
         if self.subti.has_key(key):
             for value in self.subti[key]: 
                 self.any[key].add(value)
         self.any[key] = list(self.any[key])
         self.any[key].sort()
     # AND the dictionaries
     self.all = {}
     for key in sorted(list(set(self.bacsu.keys() + self.subti.keys()))):
         self.all[key] = set()  
         allSynonyms = set()
         bacsuSet = set()
         if self.bacsu.has_key(key):
             bacsuSet = self.bacsu[key]
             for x in bacsuSet: allSynonyms.add(x)
         subtiSet = set()
         if self.subti.has_key(key):
             subtiSet = self.subti[key]
             for x in subtiSet: allSynonyms.add(x)
         for synonym in allSynonyms:
             if synonym in bacsuSet and synonym in subtiSet:
                 self.all[key].add(synonym)
         self.all[key] = list(self.all[key])
         self.all[key].sort()
Exemplo n.º 15
0
 def __init__(self, featureSet=None):
     FeatureBuilder.__init__(self, featureSet)
     if not hasattr(Settings, "DRUG_BANK_XML"):
         print >> sys.stderr, "Drug Bank XML not installed, installing now"
         installDrugBank(updateLocalSettings=True)
     drugBankFile = Settings.DRUG_BANK_XML
     #drugBankFile = "/home/jari/data/DDIExtraction2011/resources/drugbank.xml"
     # Load drug data into memory on first call to constructor
     if DrugFeatureBuilder.data == None:
         DrugFeatureBuilder.data, DrugFeatureBuilder.nameToId = prepareDrugBank(drugBankFile)
         
         DrugFeatureBuilder.tokenToId = {}
         for name in DrugFeatureBuilder.nameToId:
             splits = name.split()
             if len(splits) < 2:
                 continue
             for split in splits:
                 if split not in DrugFeatureBuilder.tokenToId:
                     DrugFeatureBuilder.tokenToId[split] = []
                 DrugFeatureBuilder.tokenToId[split].extend(DrugFeatureBuilder.nameToId[name])
         for token in DrugFeatureBuilder.tokenToId:
             DrugFeatureBuilder.tokenToId[token] = sorted(list(set(DrugFeatureBuilder.tokenToId[token])))
         
         DrugFeatureBuilder.interactionPairs = buildInteractionPairs(DrugFeatureBuilder.data)
Exemplo n.º 16
0
 def __init__(self, featureSet):
     global g_bioInferFileName
     FeatureBuilder.__init__(self, featureSet)
     self.ontologies = loadOntologies(g_bioInferFileName)
Exemplo n.º 17
0
 def __init__(self, featureSet):
     FeatureBuilder.__init__(self, featureSet)
     self.noAnnType = False
     self.edgeTypesForFeatures = []
     self.useNonNameEntities = False
Exemplo n.º 18
0
 def __init__(self, featureSet=None):
     FeatureBuilder.__init__(self, featureSet)
     from nltk.corpus import wordnet
     self.wordnet = wordnet
     print >> sys.stderr, "Using WordNet via NLTK"
Exemplo n.º 19
0
 def __init__(self, featureSet):
     FeatureBuilder.__init__(self, featureSet)
     self.generator = random.Random(0)
Exemplo n.º 20
0
from FeatureBuilder import FeatureBuilder

# fb = FeatureBuilder(csv_file_name='~/sdb1/ais/ais_data.csv')
fb = FeatureBuilder(
    csv_file_name=
    '~/sdb1/ais/data/frequencyOfEdgesInData_ais201710_compact_version.csv')
fb.run()
print fb.get_new_feature_df()
Exemplo n.º 21
0
 def __init__(self, featureSet):
     FeatureBuilder.__init__(self, featureSet)
     self.terms = {}
     self.byName = {}
     self.byKeyword = {}
     self.loadOBO(os.path.join(os.path.dirname(os.path.abspath(__file__)), "OntoBiotope_BioNLP-ST-2016.obo"))
 def __init__(self, featureSet):
     global g_bioInferFileName
     FeatureBuilder.__init__(self, featureSet)
     self.ontologies = loadOntologies(g_bioInferFileName)
Exemplo n.º 23
0
 def __init__(self, featureSet):
     FeatureBuilder.__init__(self, featureSet)
Exemplo n.º 24
0
from Classifier import Classifier
from FeatureBuilder import FeatureBuilder

features = FeatureBuilder()
features.load_model()

company_classifier_path = './models/one_vs_rest_company'
location_classifier_path = './models/one_vs_rest_location'
goods_classifier_path = './models/one_vs_rest_goods'

company_X_train, company_y_train, company_X_test, company_y_test = features.one_vs_rest_generator(
    0)
location_X_train, location_y_train, location_X_test, location_y_test = features.one_vs_rest_generator(
    1)
goods_X_train, goods_y_train, goods_X_test, goods_y_test = features.one_vs_rest_generator(
    2)

classifier = Classifier(features.company_feature_encoder,
                        features.location_feature_encoder,
                        features.goods_feature_encoder)
classifier.tpot_classifiers(company_X_train, company_y_train, company_X_test,
                            company_y_test, company_classifier_path)
classifier.tpot_classifiers(location_X_train, location_y_train,
                            location_X_test, location_y_test,
                            location_classifier_path)
classifier.tpot_classifiers(goods_X_train, goods_y_train, goods_X_test,
                            goods_y_test, goods_classifier_path)
Exemplo n.º 25
0
 def build_feature_builders(self):
     self.feature_list = []
     for feature_opt in self.config["feature"]:
         builder = FeatureBuilder(feature_opt, self.config, self.dataloader)
         self.feature_list.append(builder)
Exemplo n.º 26
0
 def __init__(self, featureSet=None):
     FeatureBuilder.__init__(self, featureSet)
     from nltk.corpus import wordnet
     self.wordnet = wordnet
     print >> sys.stderr, "Using WordNet via NLTK"
Exemplo n.º 27
0
from Classifier import Classifier
from FeatureBuilder import FeatureBuilder
import json
from RandomStringClassifier import RandomStringClassifier
from DateClassifier import DateClassifier
import operator

features = FeatureBuilder()
features.load_model()

classifier_3types = Classifier(features.company_feature_encoder,
                               features.location_feature_encoder,
                               features.goods_feature_encoder)
classifier_3types.load_classifiers()

dateClassifier = DateClassifier()
randomClassifier = RandomStringClassifier()

fname = 'test5.json'

with open('./data/' + fname) as f:
    data = json.load(f)

output = {}
confidence_thresh = 0.5

for cats in data['recognitionResult']['lines']:
    for word in cats['words']:
        text = word['text'].lower()
        clean_text = ''.join(c for c in text if c.isalnum())
        is_date = dateClassifier.classify(text)
Exemplo n.º 28
0
 def __init__(self, featureSet, style=None):
     FeatureBuilder.__init__(self, featureSet, style)
     self.model = WV.load(Settings.W2VFILE, 100000,
                          10000000)  #10000, 500000)
Exemplo n.º 29
0
 def initialize(self, dataPath):
     FeatureBuilder.initialize(self, dataPath)
     self.dataset = getYelpDataset(dataPath)