PyTorch Notes
Contents
PyTorch Fundamentals
Simple array manipulations/creations
import torch # convert numpy array to pytorch array torch.Tensor(numpy_tensor) # or another way torch.from_numpy(numpy_tensor) # convert torch tensor to numpy representation pytorch_tensor.numpy() # create default arrays torch.ones((2, 2)) torch.rand(2, 2)
Define manual seed
# CPU seed torch.manual_seed(42) # GPU seed torch.cuda.manual_seed_all(42)
Move tensor from CPU to GPU and back
# move to GPU cpu_tensor.cuda() # move to CPU gpu_tensor.cpu()
Tensor manipulations
a = torch.rand(2, 3) # get the shape of tensor a.size() # reshape tensor to required shape a.view(3, -1) # simple addition b = torch.ones((2, 3)) c = a + b c = torch.add(a, b) # in-place addition a.add_(b) # get the mean and std a.mean(dim=0) a.std(dim=1)
Variables and Gradients
Variable creation
import torch from torch.autograd import Variable # create variable a = Variable(torch.ones((2, 3)), requires_grad=True) # access variable tensor a.data # access variable gradient a.grad
Compute gradient
x = Variable(torch.ones(2), requires_grad=True) y = 5 * (x + 2) ** 2 # backward should be called only on a scalar o = (1 / 2) * torch.sum(y) # compute backward o.backward() # now we have the gradients of x x.grad # 10, 10
Neural Networks
Define simple NN
Simple network without any optimizer and manually defined loss function
import torch from torch.autograd import Variable dtype = torch.FloatTensor N, D_in, H, D_out = 64, 1000, 100, 10 x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False) y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False) w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True) w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True) learning_rate = 1e-6 for t in range(500): y_pred = x.mm(w1) # simulate ReLU behavior y_pred = y_pred.clamp(min=0) y_pred = y_pred @ w2 loss = (y_pred - y).pow(2).sum() # compute backward pass loss.backward() # manually apply the gradients w1.data -= learning_rate * w1.grad.data w2.data -= learning_rate * w2.grad.data # Manually zero the gradients after updating weights w1.grad.data.zero_() w2.grad.data.zero_()
NN with optimizer and loss
Now we will define network with nn module and with already predefined optimizer and loss
import torch from torch.autograd import Variable N, D_in, H, D_out = 64, 1000, 100, 10 x = Variable(torch.randn(N, D_in)) y = Variable(torch.randn(N, D_out), requires_grad=False) model = torch.nn.Sequential( torch.nn.Linear(D_in, H), torch.nn.ReLU(), torch.nn.Linear(H, D_out), ) learning_rate = 1e-6 loss_fn = torch.nn.MSELoss(size_average=False) optimizer = torch.nn.optim.SGD(model.parameters(), lr=learning_rate) for t in range(500): y_pred = model(x) loss = loss_fn(y_pred, target) optimizer.zero_grad() loss.backward() optimizer.step()
NN class based
Create NN as class inherited from torch.nn.Module with convolution and linear layers
import torch import torch.nn.functional as F class Model(torch.nn.Module): def __init__(self, D_in, H, D_out): super().__init__() self.linear1 = torch.nn.Linear(D_in, H) self.linear2 = torch.nn.Linear(H, D_out) def forward(self, x): h_relu = F.relu(self.linear1(x)) y_pred = self.linear2(h_relu) return y_pred N, D_in, H, D_out = 64, 1000, 100, 10 x = Variable(torch.randn(N, D_in)) y = Variable(torch.randn(N, D_out), requires_grad=False) model = Model(D_in, H, D_out) model = Model() criterion = torch.nn.MSELoss(size_average=False) optimizer = torch.optim.SGD(model.parameters(), lr=1e-4) for t in range(500): y_pred = model(x) loss = criterion(y_pred, y) optimizer.zero_grad() loss.backward() optimizer.step()
NN sequential mixed with class approach
import torch class Model(torch.nn.Module): super().__init__() self.feature_extractor = nn.Sequential( Conv2d(3, 12, kernel_size=3, padding=1, stride=1), Conv2d(12, 24, kernel_size=3, padding=1, stride=1), ) def forward(self, x): x = self.feature_extractor(x) return x
Convolution Examples
Conv2d have such inputs: in_channels, out_channels, kernel_size
import torch # Sequential based model = torch.nn.Sequential( torch.nn.Conv2d(1,20,5), torch.nn.ReLU(), torch.nn.Conv2d(20,64,5), torch.nn.ReLU() ) # class based class Model(nn.Module): def __init__(self): super().__init__() self.conv1 = nn.Conv2d(1, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5) def forward(self, x): x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) x = F.max_pool2d(F.relu(self.conv2(x)), 2) return x model = Model()
Define custom functions
New style
import torch # definition itself class MyFunction(torch.autograd.Function): @staticmethod def forward(ctx, input): ctx.save_for_backward(input) output = torch.sign(input) return output @staticmethod def backward(ctx, grad_output): # saved tensors - tuple of tensors, so we need get first input, = ctx.saved_variables grad_output[input.ge(1)] = 0 grad_output[input.le(-1)] = 0 return grad_output # usage x = torch.randn(10, 20) y = MyFunction.apply(x) # or my_func = MyFunction.apply y = my_func(x) # and if we want to use inside nn.Module class MyFunctionModule(torch.nn.Module): def forward(self, x): return MyFunction.apply(x)
Old style
import torch # definition itself class MyFunction(torch.autograd.Function): def forward(self, input): self.save_for_backward(input) output = torch.sign(input) return output def backward(self, grad_output): input, = self.saved_tensors grad_output[input.ge(1)] = 0 grad_output[input.le(-1)] = 0 return grad_output # usage x = torch.randn(10, 20) y = MyFunction()(x) # and if we want to use inside nn.Module class MyFunctionModule(torch.nn.Module): def forward(self, x): return MyFunction()(x)
Additional topics
Train flag
Train flag can be updated with boolean to disable dropout and batch norm learning
model.train(True) # execute train step model.train(False) # run inference step
Learning Rate Schedule
PyTorch have a lot of learning rate schedulers out of the box
from torch.optim import lr_scheduler scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) for epoch in range(100): scheduler.step() train() validate()
Data Loaders
import pandas as pd import torch import torchvision as tv data_transforms = tv.transforms.Compose([ tv.transforms.RandomCrop((64, 64), padding=4), tv.transforms.RandomHorizontalFlip(), tv.transforms.ToTensor(), ]) class ImagesDataset(torch.utils.data.Dataset): def __init__(self, df, transform=None, loader=tv.datasets.folder.default_loader): self.df = df self.transform = transform self.loader = loader def __getitem__(self, index): row = self.df.iloc[index] target = row['class_'] path = row['path'] img = self.loader(path) if self.transform is not None: img = self.transform(img) return img, target def __len__(self): n, _ = self.df.shape return n train_df = pd.read_csv('path/to/some.csv') train_dataset = ImagesDataset( df=train_df, transform=data_transforms['train']) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=10, shuffle=True, num_workers=16) # fetch the batch, same as `__getitem__` method for img, target in train_loader: pass
Use volatile flag during inference
In case of inference it’s better provide volatile flag during variable creation. It can be provided only in case if you exactly sure that there will be no any gradients computing
input_ = torch.Variable(input_, volatile=True)
Weights initialization
Weight initializtion in pytorch can be implemented in two ways:
import torch # as function call to `nn` module w = torch.Tensor(3, 5) torch.nn.init.xavier_normal(w) # as direct access to tensors data attribute def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) # for loop approach with direct access class MyModel(nn.Module): def __init__(self): for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_()
Work with CUDA
import torch # check is cuda enabled torch.cuda.is_available() # set required device torch.cuda.set_device(0) # work with some required cuda device with torch.cuda.device(1): # allocates a tensor on GPU 1 a = torch.cuda.FloatTensor(1) # a.get_device() == 1 # but you still can manually assign tensor to required device d = torch.randn(2).cuda(2) # d.get_device() == 2