Module pysimt.layers.encoders.transformers
Expand source code
from torch import nn
from .. import TFEmbedding
from ..transformers import FlatMMCrossAttentionSublayer
from ..transformers import SelfAttentionSublayer
from ..positionwise_ff import PositionwiseSublayer
from ...utils.nn import generate_padding_masks, generate_lookahead_mask
class TFEncoderBlock(nn.Module):
def __init__(self, model_dim, ff_dim, n_heads, ff_activ='gelu',
dropout=0.1, attn_dropout=0.0,
pre_norm=True, enc_img_attn=None):
"""
Creates a TFEncoderBlock, consisting of a self-attention sublayer and a position-wise feed forward sublayer.
:param model_dim: The model dimensions.
:param ff_dim: The feed forward network dimensions.
:param n_heads: The number of attention heads.
:param ff_activ: The feed forward network activation function.
:param dropout: The dropout.
:param pre_norm: Whether it should use 'pre_norm' layer types or 'post_norm' Default True.
:param enc_img_attn: The encoder image attention. Possible values: ['flat', 'None']. Default: None.
"""
super().__init__()
self.enc_img_attn = enc_img_attn
if enc_img_attn == 'flat':
self.multimodal_attn = FlatMMCrossAttentionSublayer(
model_dim, n_heads, dropout, attn_dropout, pre_norm)
else:
self.self_attn = SelfAttentionSublayer(
model_dim, n_heads, dropout, attn_dropout, pre_norm)
self.feed_forward = PositionwiseSublayer(
model_dim, ff_dim, ff_activ, dropout, pre_norm)
def forward(self, encoder_x, encoder_mask=None, image_x=None, image_mask=None):
"""
Performs a forward pass of an encoder block.
:param encoder_x: The encoder's source text input.
:param encoder_mask: The encoder's source text input mask.
:param image_x: The encoder's image input.
:param image_mask: The encoder's image input mask.
:return: The output of the forward pass.
"""
if self.enc_img_attn == 'flat' and image_x is not None:
encoder_x, attn_weights = self.multimodal_attn(
encoder_x, key_txt=None, value_txt=None, mask_txt=encoder_mask,
key_img=image_x, value_img=image_x, mask_img=image_mask)
all_attn_weights = {'multimodal': attn_weights}
else:
encoder_x, attn_weights = self.self_attn(encoder_x, encoder_mask)
all_attn_weights = {'self': attn_weights}
return self.feed_forward(encoder_x, encoder_mask), all_attn_weights
class TFEncoder(nn.Module):
"""Encoder block for Transformer.
Arguments:
Input:
Output:
"""
def __init__(self, model_dim, ff_dim, n_heads, n_layers, num_embeddings,
ff_activ='gelu', dropout=0.1, attn_dropout=0.0, pre_norm=True,
enc_bidirectional=False, enc_img_attn=None,
store_attn_weights=False):
"""
Creates a TFEncoder.
:param model_dim: The model dimension.
:param ff_dim: The feed-forward layer dimension.
:param n_heads: The number of heads.
:param n_layers: The number of layers.
:param num_embeddings: The number of the embeddings.
:param ff_activ: The feed forward layer activation function. Default 'gelu'.
:param dropout: The dropout value. Default 0.1.
:param pre_norm: Whether it should use 'pre_norm' layer types or 'post_norm' Default True.
:param enc_bidirectional: The encoder should be bidirectional. Default: False
"""
super().__init__()
self.model_dim = model_dim
self.ff_dim = ff_dim
self.n_heads = n_heads
self.n_layers = n_layers
self.ff_activ = ff_activ
self.dropout = dropout
self.pre_norm = pre_norm
self.enc_bidirectional = enc_bidirectional
self.enc_img_attn = enc_img_attn
self.store_attn_weights = store_attn_weights
self.src_embedding = TFEmbedding(
num_embeddings=num_embeddings, embedding_dim=self.model_dim, dropout=dropout)
self._encoder_mask = None
self._encoder_states = None
self.final_layer_norm = None
self._all_attention_weights = []
blocks = []
for _ in range(self.n_layers):
layers = TFEncoderBlock(
model_dim=self.model_dim, n_heads=self.n_heads, ff_dim=self.ff_dim,
ff_activ=self.ff_activ, dropout=self.dropout, attn_dropout=attn_dropout,
pre_norm=self.pre_norm, enc_img_attn=enc_img_attn)
blocks.append(layers)
self.blocks = nn.ModuleList(blocks)
if self.pre_norm:
self.final_layer_norm = nn.LayerNorm(self.model_dim, eps=1e-6)
def forward(self, x, **kwargs):
"""Forward-pass of the encoder block.
:param x: input tensor, shape (s_len, bsize, model_dim)
:return: The output after applying the forward pass and the mask.
"""
padding_mask = generate_padding_masks(x)
mask = padding_mask
if not self.enc_bidirectional:
mask = mask | generate_lookahead_mask(x)
x = self.src_embedding(x)
image, image_mask = self._get_image_data(kwargs)
self._all_attention_weights = []
for block in self.blocks:
x, attn_weights = block(x, mask, image_x=image, image_mask=image_mask)
if self.store_attn_weights:
self._all_attention_weights.append(attn_weights)
if self.pre_norm:
x = self.final_layer_norm(x)
self._encoder_states = x
self._encoder_mask = padding_mask
return self._encoder_states, self._encoder_mask
def get_attention_weights(self):
return self._all_attention_weights
def get_states(self, up_to=int(1e6)):
"""Reveals partial source information through `up_to` argument.
Useful for simultaneous NMT encodings."""
if not self.enc_bidirectional:
assert self._encoder_states is not None, "Call encoder first to cache states!"
return self._encoder_states[:up_to], self._encoder_mask[:, :, :up_to]
else:
raise NotImplementedError(
"get_states is not implemented for bidirectional encoders as the states cannot be easily cached")
@staticmethod
def _get_image_data(kwargs):
image_x = None
image_mask = None
if 'img_data' in kwargs:
image_x, image_mask = kwargs['img_data']
return image_x, image_mask
Classes
class TFEncoder (model_dim, ff_dim, n_heads, n_layers, num_embeddings, ff_activ='gelu', dropout=0.1, attn_dropout=0.0, pre_norm=True, enc_bidirectional=False, enc_img_attn=None, store_attn_weights=False)
-
Encoder block for Transformer.
Arguments:
Input:
Output:
Creates a TFEncoder. :param model_dim: The model dimension. :param ff_dim: The feed-forward layer dimension. :param n_heads: The number of heads. :param n_layers: The number of layers. :param num_embeddings: The number of the embeddings. :param ff_activ: The feed forward layer activation function. Default 'gelu'. :param dropout: The dropout value. Default 0.1. :param pre_norm: Whether it should use 'pre_norm' layer types or 'post_norm' Default True. :param enc_bidirectional: The encoder should be bidirectional. Default: False
Expand source code
class TFEncoder(nn.Module): """Encoder block for Transformer. Arguments: Input: Output: """ def __init__(self, model_dim, ff_dim, n_heads, n_layers, num_embeddings, ff_activ='gelu', dropout=0.1, attn_dropout=0.0, pre_norm=True, enc_bidirectional=False, enc_img_attn=None, store_attn_weights=False): """ Creates a TFEncoder. :param model_dim: The model dimension. :param ff_dim: The feed-forward layer dimension. :param n_heads: The number of heads. :param n_layers: The number of layers. :param num_embeddings: The number of the embeddings. :param ff_activ: The feed forward layer activation function. Default 'gelu'. :param dropout: The dropout value. Default 0.1. :param pre_norm: Whether it should use 'pre_norm' layer types or 'post_norm' Default True. :param enc_bidirectional: The encoder should be bidirectional. Default: False """ super().__init__() self.model_dim = model_dim self.ff_dim = ff_dim self.n_heads = n_heads self.n_layers = n_layers self.ff_activ = ff_activ self.dropout = dropout self.pre_norm = pre_norm self.enc_bidirectional = enc_bidirectional self.enc_img_attn = enc_img_attn self.store_attn_weights = store_attn_weights self.src_embedding = TFEmbedding( num_embeddings=num_embeddings, embedding_dim=self.model_dim, dropout=dropout) self._encoder_mask = None self._encoder_states = None self.final_layer_norm = None self._all_attention_weights = [] blocks = [] for _ in range(self.n_layers): layers = TFEncoderBlock( model_dim=self.model_dim, n_heads=self.n_heads, ff_dim=self.ff_dim, ff_activ=self.ff_activ, dropout=self.dropout, attn_dropout=attn_dropout, pre_norm=self.pre_norm, enc_img_attn=enc_img_attn) blocks.append(layers) self.blocks = nn.ModuleList(blocks) if self.pre_norm: self.final_layer_norm = nn.LayerNorm(self.model_dim, eps=1e-6) def forward(self, x, **kwargs): """Forward-pass of the encoder block. :param x: input tensor, shape (s_len, bsize, model_dim) :return: The output after applying the forward pass and the mask. """ padding_mask = generate_padding_masks(x) mask = padding_mask if not self.enc_bidirectional: mask = mask | generate_lookahead_mask(x) x = self.src_embedding(x) image, image_mask = self._get_image_data(kwargs) self._all_attention_weights = [] for block in self.blocks: x, attn_weights = block(x, mask, image_x=image, image_mask=image_mask) if self.store_attn_weights: self._all_attention_weights.append(attn_weights) if self.pre_norm: x = self.final_layer_norm(x) self._encoder_states = x self._encoder_mask = padding_mask return self._encoder_states, self._encoder_mask def get_attention_weights(self): return self._all_attention_weights def get_states(self, up_to=int(1e6)): """Reveals partial source information through `up_to` argument. Useful for simultaneous NMT encodings.""" if not self.enc_bidirectional: assert self._encoder_states is not None, "Call encoder first to cache states!" return self._encoder_states[:up_to], self._encoder_mask[:, :, :up_to] else: raise NotImplementedError( "get_states is not implemented for bidirectional encoders as the states cannot be easily cached") @staticmethod def _get_image_data(kwargs): image_x = None image_mask = None if 'img_data' in kwargs: image_x, image_mask = kwargs['img_data'] return image_x, image_mask
Ancestors
- torch.nn.modules.module.Module
Class variables
var dump_patches : bool
var training : bool
Methods
def forward(self, x, **kwargs) ‑> Callable[..., Any]
-
Forward-pass of the encoder block.
:param x: input tensor, shape (s_len, bsize, model_dim) :return: The output after applying the forward pass and the mask.
Expand source code
def forward(self, x, **kwargs): """Forward-pass of the encoder block. :param x: input tensor, shape (s_len, bsize, model_dim) :return: The output after applying the forward pass and the mask. """ padding_mask = generate_padding_masks(x) mask = padding_mask if not self.enc_bidirectional: mask = mask | generate_lookahead_mask(x) x = self.src_embedding(x) image, image_mask = self._get_image_data(kwargs) self._all_attention_weights = [] for block in self.blocks: x, attn_weights = block(x, mask, image_x=image, image_mask=image_mask) if self.store_attn_weights: self._all_attention_weights.append(attn_weights) if self.pre_norm: x = self.final_layer_norm(x) self._encoder_states = x self._encoder_mask = padding_mask return self._encoder_states, self._encoder_mask
def get_attention_weights(self)
-
Expand source code
def get_attention_weights(self): return self._all_attention_weights
def get_states(self, up_to=1000000)
-
Reveals partial source information through
up_to
argument. Useful for simultaneous NMT encodings.Expand source code
def get_states(self, up_to=int(1e6)): """Reveals partial source information through `up_to` argument. Useful for simultaneous NMT encodings.""" if not self.enc_bidirectional: assert self._encoder_states is not None, "Call encoder first to cache states!" return self._encoder_states[:up_to], self._encoder_mask[:, :, :up_to] else: raise NotImplementedError( "get_states is not implemented for bidirectional encoders as the states cannot be easily cached")
class TFEncoderBlock (model_dim, ff_dim, n_heads, ff_activ='gelu', dropout=0.1, attn_dropout=0.0, pre_norm=True, enc_img_attn=None)
-
Base class for all neural network modules.
Your models should also subclass this class.
Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::
import torch.nn as nn import torch.nn.functional as F class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) def forward(self, x): x = F.relu(self.conv1(x)) return F.relu(self.conv2(x))
Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:
to
, etc.:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool
Creates a TFEncoderBlock, consisting of a self-attention sublayer and a position-wise feed forward sublayer. :param model_dim: The model dimensions. :param ff_dim: The feed forward network dimensions. :param n_heads: The number of attention heads. :param ff_activ: The feed forward network activation function. :param dropout: The dropout. :param pre_norm: Whether it should use 'pre_norm' layer types or 'post_norm' Default True. :param enc_img_attn: The encoder image attention. Possible values: ['flat', 'None']. Default: None.
Expand source code
class TFEncoderBlock(nn.Module): def __init__(self, model_dim, ff_dim, n_heads, ff_activ='gelu', dropout=0.1, attn_dropout=0.0, pre_norm=True, enc_img_attn=None): """ Creates a TFEncoderBlock, consisting of a self-attention sublayer and a position-wise feed forward sublayer. :param model_dim: The model dimensions. :param ff_dim: The feed forward network dimensions. :param n_heads: The number of attention heads. :param ff_activ: The feed forward network activation function. :param dropout: The dropout. :param pre_norm: Whether it should use 'pre_norm' layer types or 'post_norm' Default True. :param enc_img_attn: The encoder image attention. Possible values: ['flat', 'None']. Default: None. """ super().__init__() self.enc_img_attn = enc_img_attn if enc_img_attn == 'flat': self.multimodal_attn = FlatMMCrossAttentionSublayer( model_dim, n_heads, dropout, attn_dropout, pre_norm) else: self.self_attn = SelfAttentionSublayer( model_dim, n_heads, dropout, attn_dropout, pre_norm) self.feed_forward = PositionwiseSublayer( model_dim, ff_dim, ff_activ, dropout, pre_norm) def forward(self, encoder_x, encoder_mask=None, image_x=None, image_mask=None): """ Performs a forward pass of an encoder block. :param encoder_x: The encoder's source text input. :param encoder_mask: The encoder's source text input mask. :param image_x: The encoder's image input. :param image_mask: The encoder's image input mask. :return: The output of the forward pass. """ if self.enc_img_attn == 'flat' and image_x is not None: encoder_x, attn_weights = self.multimodal_attn( encoder_x, key_txt=None, value_txt=None, mask_txt=encoder_mask, key_img=image_x, value_img=image_x, mask_img=image_mask) all_attn_weights = {'multimodal': attn_weights} else: encoder_x, attn_weights = self.self_attn(encoder_x, encoder_mask) all_attn_weights = {'self': attn_weights} return self.feed_forward(encoder_x, encoder_mask), all_attn_weights
Ancestors
- torch.nn.modules.module.Module
Class variables
var dump_patches : bool
var training : bool
Methods
def forward(self, encoder_x, encoder_mask=None, image_x=None, image_mask=None) ‑> Callable[..., Any]
-
Performs a forward pass of an encoder block. :param encoder_x: The encoder's source text input. :param encoder_mask: The encoder's source text input mask. :param image_x: The encoder's image input. :param image_mask: The encoder's image input mask.
:return: The output of the forward pass.
Expand source code
def forward(self, encoder_x, encoder_mask=None, image_x=None, image_mask=None): """ Performs a forward pass of an encoder block. :param encoder_x: The encoder's source text input. :param encoder_mask: The encoder's source text input mask. :param image_x: The encoder's image input. :param image_mask: The encoder's image input mask. :return: The output of the forward pass. """ if self.enc_img_attn == 'flat' and image_x is not None: encoder_x, attn_weights = self.multimodal_attn( encoder_x, key_txt=None, value_txt=None, mask_txt=encoder_mask, key_img=image_x, value_img=image_x, mask_img=image_mask) all_attn_weights = {'multimodal': attn_weights} else: encoder_x, attn_weights = self.self_attn(encoder_x, encoder_mask) all_attn_weights = {'self': attn_weights} return self.feed_forward(encoder_x, encoder_mask), all_attn_weights