Module pysimt.layers.transformers.self_attention_sublayer

Expand source code
from ..attention import ScaledDotAttention
from .base_sublayer import BaseSublayer


class SelfAttentionSublayer(BaseSublayer):

    def __init__(self, model_dim, n_heads, dropout=0.1,
                 attn_dropout=0.0, is_pre_norm=False):
        """
        Creates a SelfAttentionSublayer.
        :param model_dim: The model dimensions.
        :param n_heads: The number of attention heads.
        :param dropout: The dropout rate for the residual connection.
        :param is_pre_norm: Whether the layer type is pre_norm. Default: True.
        """
        super().__init__(model_dim, dropout, is_pre_norm)
        self.attn = ScaledDotAttention(model_dim, n_heads, attn_dropout)

    def forward(self, x, mask=None):
        """
        Performs a forward pass over the SelfAttentionSublayer.
        :param x: The input. Will be used as query, key and value.
        :param mask: The input mask.
        :return: The output of the SelfAttentionSublayer.
        """
        residual = x
        x = self.apply_pre_norm_if_needed(x)
        attn_out, attn_weights = self.attn((x, x, x, mask))
        out = self.apply_residual(residual, attn_out)
        out = self.apply_post_norm_if_needed(out)
        return out, attn_weights

Classes

class SelfAttentionSublayer (model_dim, n_heads, dropout=0.1, attn_dropout=0.0, is_pre_norm=False)

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:to, etc.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

Creates a SelfAttentionSublayer. :param model_dim: The model dimensions. :param n_heads: The number of attention heads. :param dropout: The dropout rate for the residual connection. :param is_pre_norm: Whether the layer type is pre_norm. Default: True.

Expand source code
class SelfAttentionSublayer(BaseSublayer):

    def __init__(self, model_dim, n_heads, dropout=0.1,
                 attn_dropout=0.0, is_pre_norm=False):
        """
        Creates a SelfAttentionSublayer.
        :param model_dim: The model dimensions.
        :param n_heads: The number of attention heads.
        :param dropout: The dropout rate for the residual connection.
        :param is_pre_norm: Whether the layer type is pre_norm. Default: True.
        """
        super().__init__(model_dim, dropout, is_pre_norm)
        self.attn = ScaledDotAttention(model_dim, n_heads, attn_dropout)

    def forward(self, x, mask=None):
        """
        Performs a forward pass over the SelfAttentionSublayer.
        :param x: The input. Will be used as query, key and value.
        :param mask: The input mask.
        :return: The output of the SelfAttentionSublayer.
        """
        residual = x
        x = self.apply_pre_norm_if_needed(x)
        attn_out, attn_weights = self.attn((x, x, x, mask))
        out = self.apply_residual(residual, attn_out)
        out = self.apply_post_norm_if_needed(out)
        return out, attn_weights

Ancestors

Class variables

var dump_patches : bool
var training : bool

Methods

def forward(self, x, mask=None) ‑> Callable[..., Any]

Performs a forward pass over the SelfAttentionSublayer. :param x: The input. Will be used as query, key and value. :param mask: The input mask. :return: The output of the SelfAttentionSublayer.

Expand source code
def forward(self, x, mask=None):
    """
    Performs a forward pass over the SelfAttentionSublayer.
    :param x: The input. Will be used as query, key and value.
    :param mask: The input mask.
    :return: The output of the SelfAttentionSublayer.
    """
    residual = x
    x = self.apply_pre_norm_if_needed(x)
    attn_out, attn_weights = self.attn((x, x, x, mask))
    out = self.apply_residual(residual, attn_out)
    out = self.apply_post_norm_if_needed(out)
    return out, attn_weights

Inherited members