Module pysimt.layers.embedding
Embedding layer variants.
Expand source code
"""Embedding layer variants."""
from typing import Optional
import torch
from torch import nn
from . import FF
class TFEmbedding(torch.nn.Embedding):
"""Position-aware embeddings for Transformer models. Based on the original
Transformers paper and the implementation of OpenNMT.
Args:
num_embeddings: The size of the dictionary of embeddings
embedding_dim: The size of each embedding vector
max_len: Maximum known sequence length for positional encodings
dropout: The dropout probability
"""
def __init__(self, num_embeddings: int, embedding_dim: int,
max_len: int = 1024, dropout: float = 0.1):
""""""
self.num_embeddings = num_embeddings
self.embedding_dim = embedding_dim
self.max_len = max_len
self.dropout = dropout
# pos_embs: (max_len, emb_dim)
pos_embs = torch.zeros(self.max_len, self.embedding_dim)
# pos: (max_len, 1)
pos = torch.arange(self.max_len).unsqueeze(1)
# divs:
divs = torch.pow(
10000,
torch.arange(self.embedding_dim).float().div(self.embedding_dim))
pos_embs[:, 0::2] = torch.sin(pos / divs[0::2])
pos_embs[:, 1::2] = torch.cos(pos / divs[1::2])
# pos_embs: (max_len, 1, emb_dim)
pos_embs.unsqueeze_(1)
sqrt_dim = torch.scalar_tensor(self.embedding_dim).sqrt()
# Call parent's init() first
super().__init__(num_embeddings, embedding_dim, padding_idx=0)
# Register non-learnable params as buffers
self.register_buffer('pos_embs', pos_embs)
self.register_buffer('sqrt_dim', sqrt_dim)
# Create dropout layer
self.dropout_layer = torch.nn.Dropout(p=self.dropout)
def forward(self, x):
# Get the embeddings from parent's forward first
embs = super().forward(x)
return self.dropout_layer(
embs.mul(self.sqrt_dim) + self.pos_embs[:embs.size(0)])
class ProjectedEmbedding(nn.Embedding):
"""An extension layer to regular `torch.nn.Embedding` with MLP and dropout
applied afterwards.
Args:
num_embeddings: The size of the dictionary of embeddings
embedding_dim: The size of each embedding vector
out_dim: The output size of the feed-forward projection layer
activ: The activation type of the feed-forward projection layer.
`None` and `linear` denote a linear layer.
dropout: the dropout probability
"""
def __init__(self, num_embeddings: int, embedding_dim: int,
out_dim: int, activ: Optional[str] = 'linear',
dropout: float = 0.0):
""""""
super().__init__(num_embeddings, embedding_dim, padding_idx=0)
self.proj = FF(embedding_dim, out_dim, activ=activ, bias=False)
self.do = nn.Dropout(dropout) if dropout > 0.0 else lambda x: x
def forward(self, input):
return self.do(self.proj(super().forward(input)))
Classes
class ProjectedEmbedding (num_embeddings: int, embedding_dim: int, out_dim: int, activ: Union[str, NoneType] = 'linear', dropout: float = 0.0)
-
An extension layer to regular
torch.nn.Embedding
with MLP and dropout applied afterwards.Args
num_embeddings
- The size of the dictionary of embeddings
embedding_dim
- The size of each embedding vector
out_dim
- The output size of the feed-forward projection layer
activ
- The activation type of the feed-forward projection layer.
None
andlinear
denote a linear layer. dropout
- the dropout probability
Expand source code
class ProjectedEmbedding(nn.Embedding): """An extension layer to regular `torch.nn.Embedding` with MLP and dropout applied afterwards. Args: num_embeddings: The size of the dictionary of embeddings embedding_dim: The size of each embedding vector out_dim: The output size of the feed-forward projection layer activ: The activation type of the feed-forward projection layer. `None` and `linear` denote a linear layer. dropout: the dropout probability """ def __init__(self, num_embeddings: int, embedding_dim: int, out_dim: int, activ: Optional[str] = 'linear', dropout: float = 0.0): """""" super().__init__(num_embeddings, embedding_dim, padding_idx=0) self.proj = FF(embedding_dim, out_dim, activ=activ, bias=False) self.do = nn.Dropout(dropout) if dropout > 0.0 else lambda x: x def forward(self, input): return self.do(self.proj(super().forward(input)))
Ancestors
- torch.nn.modules.sparse.Embedding
- torch.nn.modules.module.Module
Class variables
var embedding_dim : int
var max_norm : float
var norm_type : float
var num_embeddings : int
var padding_idx : int
var scale_grad_by_freq : bool
var sparse : bool
var weight : torch.Tensor
Methods
def forward(self, input) ‑> Callable[..., Any]
-
Defines the computation performed at every call.
Should be overridden by all subclasses.
Note
Although the recipe for forward pass needs to be defined within this function, one should call the :class:
Module
instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.Expand source code
def forward(self, input): return self.do(self.proj(super().forward(input)))
class TFEmbedding (num_embeddings: int, embedding_dim: int, max_len: int = 1024, dropout: float = 0.1)
-
Position-aware embeddings for Transformer models. Based on the original Transformers paper and the implementation of OpenNMT.
Args
num_embeddings
- The size of the dictionary of embeddings
embedding_dim
- The size of each embedding vector
max_len
- Maximum known sequence length for positional encodings
dropout
- The dropout probability
Expand source code
class TFEmbedding(torch.nn.Embedding): """Position-aware embeddings for Transformer models. Based on the original Transformers paper and the implementation of OpenNMT. Args: num_embeddings: The size of the dictionary of embeddings embedding_dim: The size of each embedding vector max_len: Maximum known sequence length for positional encodings dropout: The dropout probability """ def __init__(self, num_embeddings: int, embedding_dim: int, max_len: int = 1024, dropout: float = 0.1): """""" self.num_embeddings = num_embeddings self.embedding_dim = embedding_dim self.max_len = max_len self.dropout = dropout # pos_embs: (max_len, emb_dim) pos_embs = torch.zeros(self.max_len, self.embedding_dim) # pos: (max_len, 1) pos = torch.arange(self.max_len).unsqueeze(1) # divs: divs = torch.pow( 10000, torch.arange(self.embedding_dim).float().div(self.embedding_dim)) pos_embs[:, 0::2] = torch.sin(pos / divs[0::2]) pos_embs[:, 1::2] = torch.cos(pos / divs[1::2]) # pos_embs: (max_len, 1, emb_dim) pos_embs.unsqueeze_(1) sqrt_dim = torch.scalar_tensor(self.embedding_dim).sqrt() # Call parent's init() first super().__init__(num_embeddings, embedding_dim, padding_idx=0) # Register non-learnable params as buffers self.register_buffer('pos_embs', pos_embs) self.register_buffer('sqrt_dim', sqrt_dim) # Create dropout layer self.dropout_layer = torch.nn.Dropout(p=self.dropout) def forward(self, x): # Get the embeddings from parent's forward first embs = super().forward(x) return self.dropout_layer( embs.mul(self.sqrt_dim) + self.pos_embs[:embs.size(0)])
Ancestors
- torch.nn.modules.sparse.Embedding
- torch.nn.modules.module.Module
Class variables
var embedding_dim : int
var max_norm : float
var norm_type : float
var num_embeddings : int
var padding_idx : int
var scale_grad_by_freq : bool
var sparse : bool
var weight : torch.Tensor
Methods
def forward(self, x) ‑> Callable[..., Any]
-
Defines the computation performed at every call.
Should be overridden by all subclasses.
Note
Although the recipe for forward pass needs to be defined within this function, one should call the :class:
Module
instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.Expand source code
def forward(self, x): # Get the embeddings from parent's forward first embs = super().forward(x) return self.dropout_layer( embs.mul(self.sqrt_dim) + self.pos_embs[:embs.size(0)])