# vocab size
  V = len(word2idx)
  print("Vocab size:", V)

  # we will also treat beginning of sentence and end of sentence as bigrams
  # START -> first word
  # last word -> END
  start_idx = word2idx['START']
  end_idx = word2idx['END']


  # a matrix where:
  # row = last word
  # col = current word
  # value at [row, col] = p(current word | last word)
  bigram_probs = get_bigram_probs(sentences, V, start_idx, end_idx, smoothing=0.1)


  # train a shallow neural network model
  D = 100
  W1 = np.random.randn(V, D) / np.sqrt(V)
  W2 = np.random.randn(D, V) / np.sqrt(D)

  losses = []
  epochs = 1
  lr = 1e-2
  
  def softmax(a):
    a = a - a.max()
    exp_a = np.exp(a)
    return exp_a / exp_a.sum(axis=1, keepdims=True)
示例#2
0
V = len(word2idx)
print('Vocab size:', V)
## 第一種使用的權重 : randomly initialize weights
W = np.random.randn(V, V) / np.sqrt(V)
# b = 這次省略bias 設置 


# we will also treat beginning of sentence and end of sentence as bigrams
# START -> first word
# last word -> END
start_idx = word2idx['START']
end_idx = word2idx['END']

## 第二種使用的權重 : 用markov 產生的 bigram language model
bigram_probs = get_bigram_probs(sentences, V, start_idx, end_idx, smoothing=1)
W_bigram = np.log(bigram_probs)


def sofmax(a):
    expA = np.exp(a)
    return expA / expA.sum(axis=1, keepdims=True)

def forword(X, W):
    return sofmax(X.dot(W))

def predict(P_Y_given_X):
    return np.argmax(P_Y_given_X, axis=1)

def cross_entropy(T, pY):
    return -np.mean(T*np.log(pY))
示例#3
0
    # vocab size
    V = len(word2index)
    print('Vocab size:', V)

    # we will also treat beginning of sentences and end of sentence as bigram
    # Start -> first word
    # last word -> End
    start_index = word2index['START']
    end_index = word2index['END']

    # a matrix where:
    # row = last word
    # col = current word
    # value at [row, col] = p(current word | last word)
    bigram_probs = get_bigram_probs(sentences, V, start_index, end_index, smoothing =0.1)

    # train a shallow neural network model
    D = 100
    W1 = np.random.randn(V, D) / np.sqrt(V)
    W2 = np.random.randn(D, V) / np.sqrt(D)

    losses = []
    epochs = 1
    lr = 1e-2

    def softmax(a):
        a = a - a.max()
        exp_a = np.exp(a)
        return exp_a / exp_a.sum(axis=1, keepdims=True)