diff --git a/Data_Generator.py b/Data_Generator.py index 312e23b..135525b 100644 --- a/Data_Generator.py +++ b/Data_Generator.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import torch from torch.utils.data import Dataset import numpy as np diff --git a/PointerNet.py b/PointerNet.py index 1394f05..1a5faf2 100644 --- a/PointerNet.py +++ b/PointerNet.py @@ -1,7 +1,10 @@ +#!/usr/bin/env python3 + import torch import torch.nn as nn from torch.nn import Parameter import torch.nn.functional as F +from torch import tanh, sigmoid class Encoder(nn.Module): @@ -98,11 +101,11 @@ def __init__(self, input_dim, self.context_linear = nn.Conv1d(input_dim, hidden_dim, 1, 1) self.V = Parameter(torch.FloatTensor(hidden_dim), requires_grad=True) self._inf = Parameter(torch.FloatTensor([float('-inf')]), requires_grad=False) - self.tanh = nn.Tanh() - self.softmax = nn.Softmax() + self.tanh = tanh + self.softmax = nn.Softmax(dim=1) # Initialize vector V - nn.init.uniform(self.V, -1, 1) + nn.init.uniform_(self.V, -1, 1) def forward(self, input, context, @@ -131,7 +134,6 @@ def forward(self, input, if len(att[mask]) > 0: att[mask] = self.inf[mask] alpha = self.softmax(att) - hidden_state = torch.bmm(ctx, alpha.unsqueeze(2)).squeeze(2) return hidden_state, alpha @@ -212,17 +214,17 @@ def step(x, hidden): gates = self.input_to_hidden(x) + self.hidden_to_hidden(h) input, forget, cell, out = gates.chunk(4, 1) - input = F.sigmoid(input) - forget = F.sigmoid(forget) - cell = F.tanh(cell) - out = F.sigmoid(out) + input = sigmoid(input) + forget = sigmoid(forget) + cell = tanh(cell) + out = sigmoid(out) c_t = (forget * c) + (input * cell) - h_t = out * F.tanh(c_t) + h_t = out * tanh(c_t) # Attention section hidden_t, output = self.att(h_t, context, torch.eq(mask, 0)) - hidden_t = F.tanh(self.hidden_out(torch.cat((hidden_t, h_t), 1))) + hidden_t = tanh(self.hidden_out(torch.cat((hidden_t, h_t), 1))) return hidden_t, c_t, output @@ -287,7 +289,7 @@ def __init__(self, embedding_dim, self.decoder_input0 = Parameter(torch.FloatTensor(embedding_dim), requires_grad=False) # Initialize decoder_input0 - nn.init.uniform(self.decoder_input0, -1, 1) + nn.init.uniform_(self.decoder_input0, -1, 1) def forward(self, inputs): """ @@ -309,8 +311,8 @@ def forward(self, inputs): encoder_outputs, encoder_hidden = self.encoder(embedded_inputs, encoder_hidden0) if self.bidir: - decoder_hidden0 = (torch.cat(encoder_hidden[0][-2:], dim=-1), - torch.cat(encoder_hidden[1][-2:], dim=-1)) + decoder_hidden0 = (torch.cat([_ for _ in encoder_hidden[0][-2:]], dim=-1), + torch.cat([_ for _ in encoder_hidden[1][-2:]], dim=-1)) else: decoder_hidden0 = (encoder_hidden[0][-1], encoder_hidden[1][-1]) @@ -319,4 +321,4 @@ def forward(self, inputs): decoder_hidden0, encoder_outputs) - return outputs, pointers \ No newline at end of file + return outputs, pointers diff --git a/Train.py b/Train.py index 2cf2695..e314ce7 100644 --- a/Train.py +++ b/Train.py @@ -1,9 +1,9 @@ -""" +#!/usr/bin/env python3 +""" Pytorch implementation of Pointer Network. http://arxiv.org/pdf/1506.03134v1.pdf. - """ import torch @@ -95,13 +95,13 @@ loss = CCE(o, target_batch) - losses.append(loss.data[0]) - batch_loss.append(loss.data[0]) + losses.append(loss.item()) + batch_loss.append(loss.item()) model_optim.zero_grad() loss.backward() model_optim.step() - iterator.set_postfix(loss='{}'.format(loss.data[0])) + iterator.set_postfix(loss='{}'.format(loss.item())) iterator.set_postfix(loss=np.average(batch_loss))