idx = 1

for bioTag in ['B-', 'I-']:
    for nerClass in ['PER', 'LOC', 'ORG', 'OTH']:
        for subtype in ['', 'deriv', 'part']:
            label2Idx[bioTag+nerClass+subtype] = idx 
            idx += 1
            
#Inverse label mapping
idx2Label = {v: k for k, v in label2Idx.items()}

            
     
# Read in data   
print "Read in data and create matrices"    
train_sentences = GermEvalReader.readFile(trainFile)
dev_sentences = GermEvalReader.readFile(devFile)
test_sentences = GermEvalReader.readFile(testFile)

# Create numpy arrays
train_x, train_y = GermEvalReader.createNumpyArray(train_sentences, windowSize, word2Idx, label2Idx)
dev_x, dev_y = GermEvalReader.createNumpyArray(dev_sentences, windowSize, word2Idx, label2Idx)
test_x, test_y = GermEvalReader.createNumpyArray(test_sentences, windowSize, word2Idx, label2Idx)


#####################################
#
# Create the Lasagne Network
#
#####################################
    "numeric": 0,
    "allLower": 1,
    "allUpper": 2,
    "initialUpper": 3,
    "other": 4,
    "mainly_numeric": 5,
    "contains_digit": 6,
    "PADDING": 7,
}

caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX)


# Read in data
print "Read in data and create matrices"
train_sentences = GermEvalReader.readFile(trainFile)
dev_sentences = GermEvalReader.readFile(devFile)
test_sentences = GermEvalReader.readFile(testFile)

# Create numpy arrays
train_x, train_case_x, train_y = GermEvalReader.createNumpyArrayWithCasing(
    train_sentences, windowSize, word2Idx, label2Idx, caseLookup
)
dev_x, dev_case_x, dev_y = GermEvalReader.createNumpyArrayWithCasing(
    dev_sentences, windowSize, word2Idx, label2Idx, caseLookup
)
test_x, test_case_x, test_y = GermEvalReader.createNumpyArrayWithCasing(
    test_sentences, windowSize, word2Idx, label2Idx, caseLookup
)

            label2Idx[bioTag+nerClass+subtype] = idx 
            idx += 1
            
#Inverse label mapping
idx2Label = {v: k for k, v in label2Idx.items()}


#Casing matrix
caseLookup = {'numeric': 0, 'allLower':1, 'allUpper':2, 'initialUpper':3, 'other':4, 'mainly_numeric':5, 'contains_digit': 6, 'PADDING':7}
                      
caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX)
            
     
# Read in data   
print "Read in data and create matrices"    
train_sentences = GermEvalReader.readFile(trainFile)
dev_sentences = GermEvalReader.readFile(devFile)
test_sentences = GermEvalReader.readFile(testFile)

# Create numpy arrays
train_x, train_case_x, train_y = GermEvalReader.createNumpyArrayWithCasing(train_sentences, windowSize, word2Idx, label2Idx, caseLookup)
dev_x, dev_case_x, dev_y = GermEvalReader.createNumpyArrayWithCasing(dev_sentences, windowSize, word2Idx, label2Idx, caseLookup)
test_x, test_case_x, test_y = GermEvalReader.createNumpyArrayWithCasing(test_sentences, windowSize, word2Idx, label2Idx, caseLookup)



#####################################
#
# Create the  Network
#
#####################################
Exemplo n.º 4
0
# Create a mapping for our labels
label2Idx = {'O': 0}
idx = 1

for bioTag in ['B-', 'I-']:
    for nerClass in ['PER', 'LOC', 'ORG', 'OTH']:
        for subtype in ['', 'deriv', 'part']:
            label2Idx[bioTag + nerClass + subtype] = idx
            idx += 1

#Inverse label mapping
idx2Label = {v: k for k, v in label2Idx.items()}

# Read in data
print "Read in data and create matrices"
train_sentences = GermEvalReader.readFile(trainFile)
dev_sentences = GermEvalReader.readFile(devFile)
test_sentences = GermEvalReader.readFile(testFile)

# Create numpy arrays
train_x, train_y = GermEvalReader.createNumpyArray(train_sentences, windowSize,
                                                   word2Idx, label2Idx)
dev_x, dev_y = GermEvalReader.createNumpyArray(dev_sentences, windowSize,
                                               word2Idx, label2Idx)
test_x, test_y = GermEvalReader.createNumpyArray(test_sentences, windowSize,
                                                 word2Idx, label2Idx)

#####################################
#
# Create the Network
#
#Casing matrix
caseLookup = {
    'numeric': 0,
    'allLower': 1,
    'allUpper': 2,
    'initialUpper': 3,
    'other': 4,
    'PADDING': 5
}

caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX)

# Read in data
print "Read in data and create matrices"
train_sentences = GermEvalReader.readFile(trainFile)
dev_sentences = GermEvalReader.readFile(devFile)
test_sentences = GermEvalReader.readFile(testFile)

# Create numpy arrays
train_x, train_case_x, train_y = GermEvalReader_with_casing.createNumpyArrayWithCasing(
    train_sentences, windowSize, word2Idx, label2Idx, caseLookup)
dev_x, dev_case_x, dev_y = GermEvalReader_with_casing.createNumpyArrayWithCasing(
    dev_sentences, windowSize, word2Idx, label2Idx, caseLookup)
test_x, test_case_x, test_y = GermEvalReader_with_casing.createNumpyArrayWithCasing(
    test_sentences, windowSize, word2Idx, label2Idx, caseLookup)

#####################################
#
# Create the Lasagne Network
#
#Casing matrix
caseLookup = {
    'numeric': 0,
    'allLower': 1,
    'allUpper': 2,
    'initialUpper': 3,
    'other': 4,
    'mainly_numeric': 5,
    'contains_digit': 6,
    'PADDING': 7
}
caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX)

# Read in data
print "Read in data and create matrices"
train_sentences = GermEvalReader.readFile(trainFile)
dev_sentences = GermEvalReader.readFile(devFile)
test_sentences = GermEvalReader.readFile(testFile)

# Create numpy arrays
train_data = GermEvalReader.createDataset(train_sentences, word2Idx, label2Idx,
                                          caseLookup)
dev_data = GermEvalReader.createDataset(dev_sentences, word2Idx, label2Idx,
                                        caseLookup)
test_data = GermEvalReader.createDataset(test_sentences, word2Idx, label2Idx,
                                         caseLookup)

#####################################
#
# Create the  Network
#
        for subtype in ['', 'deriv', 'part']:
            label2Idx[bioTag+nerClass+subtype] = idx 
            idx += 1
            
#Inverse label mapping
idx2Label = {v: k for k, v in label2Idx.items()}


#Casing matrix
caseLookup = {'numeric': 0, 'allLower':1, 'allUpper':2, 'initialUpper':3, 'other':4, 'PADDING':5}
            
caseMatrix = np.identity(len(caseLookup), dtype=theano.config.floatX)
     
# Read in data   
print "Read in data and create matrices"    
train_sentences = GermEvalReader.readFile(trainFile)
dev_sentences = GermEvalReader.readFile(devFile)
test_sentences = GermEvalReader.readFile(testFile)

# Create numpy arrays
train_x, train_case_x, train_y = GermEvalReader_with_casing.createNumpyArrayWithCasing(train_sentences, windowSize, word2Idx, label2Idx, caseLookup)
dev_x, dev_case_x, dev_y = GermEvalReader_with_casing.createNumpyArrayWithCasing(dev_sentences, windowSize, word2Idx, label2Idx, caseLookup)
test_x, test_case_x, test_y = GermEvalReader_with_casing.createNumpyArrayWithCasing(test_sentences, windowSize, word2Idx, label2Idx, caseLookup)



#####################################
#
# Create the Lasagne Network
#
#####################################