first commit

Victorwz · Victorwz · commit ddaf46b43f1d · 2020-04-22T23:27:03.000+08:00
diff --git a/config.py b/config.py
@@ -0,0 +1,25 @@
+from attrdict import AttrDict
+import os
+
+cfg = AttrDict({
+    # 'exp_name': 'test-len10-delta',
+    # 'exp_name': 'test-len1-fixedscale-aggre-super',
+    # 'exp_name': 'test-aggre-super',
+    # 'exp_name': 'test-mask',
+    'exp_name': 'test-proposal',
+    'resume': True,
+    'device': 'cuda:0',
+    # 'device': 'cpu',
+    
+    'train': {
+        'batch_size': 100,
+        'model_lr': 1e-4,
+        'max_epochs': 1000
+    },
+    'valid': {
+        'batch_size': 64
+    },
+    'num_train': 60000,
+    'logdir': 'logs/',
+    'checkpointdir': 'checkpoints/',
+})
diff --git a/generator.py b/generator.py
@@ -0,0 +1,77 @@
+import numpy as np
+import random
+import cPickle as pickle
+
+num_train = 60000
+num_val = 10000
+num_test = 10000
+
+step_num = 4
+elem_num = 26 + 10 + 1
+
+x_train = np.zeros([num_train, step_num * 2 + 3, elem_num], dtype=np.float32)
+x_val = np.zeros([num_val, step_num * 2 + 3, elem_num], dtype=np.float32)
+x_test = np.zeros([num_test, step_num * 2 + 3, elem_num], dtype=np.float32)
+
+y_train = np.zeros([num_train, elem_num], dtype=np.float32)
+y_val = np.zeros([num_val, elem_num], dtype=np.float32)
+y_test = np.zeros([num_test, elem_num], dtype=np.float32)
+
+
+def get_one_hot(c):
+    a = np.zeros([elem_num])
+    if ord('a') <= ord(c) <= ord('z'):
+        a[ord(c) - ord('a')] = 1
+    elif ord('0') <= ord(c) <= ord('9'):
+        a[ord(c) - ord('0') + 26] = 1
+    else:
+        a[-1] = 1
+    return a
+
+
+def generate_one():
+    a = np.zeros([step_num * 2 + 3, elem_num])
+    d = {}
+    st = ''
+
+    for i in range(0, step_num):
+        c = random.randint(0, 25)
+        while d.has_key(c):
+            c = random.randint(0, 25)
+        b = random.randint(0, 9)
+        d[c] = b
+        s, t = chr(c + ord('a')), chr(b + ord('0'))
+        st += s + t
+        a[i*2] = get_one_hot(s)
+        a[i*2+1] = get_one_hot(t)
+
+    s = random.choice(d.keys())
+    t = chr(s + ord('a'))
+    r = chr(d[s] + ord('0'))
+    a[step_num * 2] = get_one_hot('?')
+    a[step_num * 2 + 1] = get_one_hot('?')
+    a[step_num * 2 + 2] = get_one_hot(t)
+    st += '??' + t + r
+    e = get_one_hot(r)
+    return a, e
+
+if __name__ == '__main__':
+    for i in range(0, num_train):
+        x_train[i], y_train[i] = generate_one()
+
+    for i in range(0, num_test):
+        x_test[i], y_test[i] = generate_one()
+
+    for i in range(0, num_val):
+        x_val[i], y_val[i] = generate_one()
+
+    d = {
+        'x_train': x_train,
+        'x_test': x_test,
+        'x_val': x_val,
+        'y_train': y_train,
+        'y_test': y_test,
+        'y_val': y_val
+    }
+    with open('associative-retrieval.pkl', 'wb') as f:
+        pickle.dump(d, f, protocol=2)
diff --git a/model.py b/model.py
@@ -0,0 +1,119 @@
+from __future__ import print_function
+
+import torch
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+from config import cfg
+from tensorboardX import SummaryWriter
+from torch.autograd import Variable
+import time
+from retrieval import read_data
+from util import Checkpointer
+
+ar_data = read_data()
+
+STEP_NUM = 11
+ELEM_NUM = 26 + 10 + 1
+HIDDEN_NUM = 20
+
+def softmax_cross_entropy_with_logits(logits, labels):
+    loss = torch.sum(-labels * F.log_softmax(logits, -1), -1)
+    return loss
+
+class fast_weights_model(nn.Module):
+    """docstring for fast_weights_model"""
+    def __init__(self, batch_size, step_num, elem_num, hidden_num):
+        super(fast_weights_model, self).__init__()
+        self.x = Variable(torch.randn(batch_size, step_num, elem_num).type(torch.float32))
+        self.y = Variable(torch.randn(batch_size, elem_num).type(torch.float32))
+        self.l = torch.zeros(1, dtype=torch.float32)
+        self.e = torch.zeros(1, dtype=torch.float32)
+
+        self.w1 = Variable(torch.empty(elem_num, 50).uniform_(-np.sqrt(0.02), np.sqrt(0.02)))
+        self.b1 = Variable(torch.zeros([1, 50]).type(torch.float32))
+        self.w2 = Variable(torch.empty(500, 100).uniform_(-np.sqrt(0.01), np.sqrt(0.01)))
+        self.b2 = Variable(torch.zeros([1, 100]).type(torch.float32))
+        self.w3 = Variable(torch.empty(hidden_num, 100).uniform_(-np.sqrt(0.01), np.sqrt(0.01)))
+        self.b3 = Variable(torch.zeros([1, 100]).type(torch.float32)) 
+        self.w4 = Variable(torch.empty(100, elem_num).uniform_(-np.sqrt(1.0 / elem_num), np.sqrt(1.0 / elem_num)))
+        self.b4 = Variable(torch.zeros([1, elem_num]).type(torch.float32))
+
+        self.w = Variable(torch.tensor(0.05 * np.identity(hidden_num)).type(torch.float32))
+
+        self.c = Variable(torch.empty(100, hidden_num).uniform_(-np.sqrt(hidden_num), np.sqrt(hidden_num)))
+
+        self.g = Variable(torch.ones([1, hidden_num]).type(torch.float32))
+        self.b = Variable(torch.ones([1, hidden_num]).type(torch.float32))
+
+    def forward(self, bx, by)
+        a = torch.zeros([batch_size, hidden_num, hidden_num]).type(torch.float32)
+        h = torch.zeros([batch_size, hidden_num]).type(torch.float32)
+
+        la = []
+
+        for i in range(0, step_num):
+            s1 = torch.relu(torch.matmul(self.x[:, t, :], self.w1) + self.b1)
+            z = torch.relu(torch.matmul(s1, self.w2) + self.b2)
+
+            h = torch.relu(torch.matmul(h, self.w) + torch.matmul(z, self.c))
+
+            hs = torch.reshape(h, [batch_size, 1, hidden_num])
+
+            hh = hs
+
+            a = self.l * a + self.e * torch.matmul(hs.transpose(1,2), hs)
+
+            la.append(torch.mean(torch.pow(a,2)))
+
+            for s in range(1):
+                hs = torch.reshape(torch.matmul(h, self.w), hh.shape) + \
+                     torch.reshape(torch.matmul(z, self.c), hh.shape) + torch.matmul(hs, a)
+                mu = torch.mean(hs, 0)
+                sig = torch.sqrt(torch.mean(torch.pow((hs - mu), 2), 0))
+                hs = torch.relu(torch.div(torch.mul(self.g, (hs - mu)), sig) + self.b)
+
+            h = torch.reshape(hs, [batch_size, hidden_num])
+
+        h = torch.relu(torch.matmul(h, self.w3) + self.b3)
+        logits = torch.matmul(h, self.w4) + self.b4
+        correct = torch.argmax(logits, dim=1).eq(torch.argmax(self.y, dim=1))
+        self.loss = softmax_cross_entropy_with_logits(logits, self.y).mean()
+        self.acc = torch.mean(correct.type(torch.float32))
+
+        return self.loss, self.acc
+
+def train(self, save = 0, verbose = 0):
+    model = fast_weights_model(STEP_NUM, ELEM_NUM, HIDDEN_NUM)
+    model.train()
+    batch_size = cfg.train.batch_size
+    start_time = time.time()
+    optimizer = torch.optim.Adam(model.paramters(), lr=cfg.train.model_lr)
+    writer = SummaryWriter(logdir=os.path.join(cfg.logdir, cfg.exp_name), flush_secs=30)
+    checkpointer = Checkpointer(os.path.join(cfg.checkpointdir, cfg.exp_name))
+    start_epoch = 0
+    batch_idxs = 600
+    for epoch in range(start_epoch, cfg.train.max_epochs):
+        for idx in range(batch_idxs):
+            gloabl_step = epoch * cfg.num_train + idx + 1
+            bx, by = ar_data.train.next_batch(batch_size=cfg.batch_size)
+            loss, acc = model(bx, by)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            writer.add_scalar('loss/loss', loss, gloabl_step)
+            writer.add_scalar('acc/acc', acc, gloabl_step)
+            if verbose > 0 and idx % verbose == 0:
+                print('Epoch: [{:4d}] [{:4d}/{:4d}] time: {:.4f}, loss: {:.8f}, acc: {:.2f}'.format(
+                    epoch, idx, batch_idxs, time.time() - start_time, loss, acc
+                ))
+    checkpointer.save(model, optimizer, epoch+1)
+
+
+if __name__ == "__main__":
+    train(verbose = 10)
+
+
+
+
+
diff --git a/retrieval.py b/retrieval.py
@@ -0,0 +1,58 @@
+import numpy as np
+import collections
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+
+
+Datasets = collections.namedtuple('Datasets', ['train', 'val', 'test'])
+
+
+class Dataset(object):
+    def __init__(self, x, y):
+        self._x = x
+        self._y = y
+        self._epoch_completed = 0
+        self._index_in_epoch = 0
+        self._num_examples = self.x.shape[0]
+        self.perm = np.random.permutation(np.arange(self._num_examples))
+
+    @property
+    def x(self):
+        return self._x
+
+    @property
+    def y(self):
+        return self._y
+
+    @property
+    def num_examples(self):
+        return self._num_examples
+
+    def next_batch(self, batch_size):
+        assert batch_size <= self._num_examples
+        start = self._index_in_epoch
+        self._index_in_epoch += batch_size
+        if self._index_in_epoch >= self.num_examples:
+            self._epoch_completed += 1
+            np.random.shuffle(self.perm)
+            start = 0
+            self._index_in_epoch = batch_size
+        end = self._index_in_epoch
+        return self._x[self.perm[start:end]], self._y[self.perm[start:end]]
+
+
+def read_data(data_path='associative-retrieval.pkl'):
+    with open(data_path, 'rb') as f:
+        d = pickle.load(f)
+    x_train = d['x_train']
+    x_val = d['x_val']
+    x_test = d['x_test']
+    y_train = d['y_train']
+    y_val = d['y_val']
+    y_test = d['y_test']
+    train = Dataset(x_train, y_train)
+    test = Dataset(x_test, y_test)
+    val = Dataset(x_val, y_val)
+    return Datasets(train=train, val=val, test=test)
diff --git a/util.py b/util.py
@@ -0,0 +1,59 @@
+from collections import defaultdict, deque
+import pickle
+from attrdict import AttrDict
+import os
+import numpy as np
+import torch
+from torch import nn
+from torch import optim
+from tensorboardX import SummaryWriter
+
+class Checkpointer:
+    def __init__(self, path, max_num=3):
+        self.max_num = max_num
+        self.path = path
+        if not os.path.exists(path):
+            os.makedirs(path)
+        self.listfile = os.path.join(path, 'model_list.pkl')
+        if not os.path.exists(self.listfile):
+            with open(self.listfile, 'wb') as f:
+                model_list = []
+                pickle.dump(model_list, f)
+        
+    
+    def save(self, model, optimizer, epoch):
+        checkpoint = {
+            'model': model.state_dict(),
+            'optimizer': optimizer.state_dict(),
+            'epoch': epoch
+        }
+        filename = os.path.join(self.path, 'model_{:05}.pth'.format(epoch))
+
+        with open(self.listfile, 'rb+') as f:
+            model_list = pickle.load(f)
+            if len(model_list) >= self.max_num:
+                if os.path.exists(model_list[0]):
+                    os.remove(model_list[0])
+                del model_list[0]
+            model_list.append(filename)
+        with open(self.listfile, 'rb+') as f:
+            pickle.dump(model_list, f)
+            
+        with open(filename, 'wb') as f:
+            torch.save(checkpoint, f)
+    
+    def load(self, model, optimizer):
+        """
+        Return starting epoch
+        """
+        with open(self.listfile, 'rb') as f:
+            model_list = pickle.load(f)
+            if len(model_list) == 0:
+                print('No checkpoint found. Starting from scratch')
+                return 0
+            else:
+                checkpoint = torch.load(model_list[-1])
+                model.load_state_dict(checkpoint['model'])
+                optimizer.load_state_dict(checkpoint['optimizer'])
+                print('Load checkpoint from {}.'.format(model_list[-1]))
+                return checkpoint['epoch']