Source code for mwptoolkit.module.Embedder.position_embedder

# -*- encoding: utf-8 -*-
# @Author: Yihuai Lan
# @Time: 2021/08/29 21:47:08
# @File: position_embedder.py


import math
import torch
from torch import nn

[docs]class PositionEmbedder_x(nn.Module):
    def __init__(self, embedding_size, max_len=1024):
        super(PositionEmbedder_x, self).__init__()
        
        pe = torch.zeros(max_len, embedding_size)
        pe.require_grad = False
        
        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = torch.exp(torch.arange(0, embedding_size, 2).float() * (-torch.log(torch.tensor(10000.0)) / embedding_size))
        
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)
        
[docs]    def forward(self, input_embedding):
        '''
        Args:
            input_embedding (torch.Tensor): shape [batch_size, sequence_length, embedding_size].
        '''
        seq_len=input_embedding.size(1)
        #outputs=input_embedding+self.weight[:batch_size,:]
        outputs=input_embedding+self.pe.squeeze()[:seq_len]
        #outputs=self.dropout(outputs)
        return outputs

[docs]class PositionEmbedder(nn.Module):
    r"""This module produces sinusoidal positional embeddings of any length.
    """
    def __init__(self, embedding_size, max_length=512):
        super(PositionEmbedder, self).__init__()
        self.embedding_size = embedding_size
        self.weights = self.get_embedding(
            max_length,
            embedding_size
        )

[docs]    def get_embedding(self,max_length, embedding_size):
        r"""Build sinusoidal embeddings.
        This matches the implementation in tensor2tensor, but differs slightly
        from the description in Section 3.5 of "Attention Is All You Need".
        """
        half_dim = embedding_size // 2
        emb = math.log(10000) / (half_dim - 1)
        emb = torch.exp(torch.arange(half_dim, dtype=torch.float) * -emb)
        emb = torch.arange(max_length, dtype=torch.float).unsqueeze(1) * emb.unsqueeze(0)
        emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(max_length, -1)
        if embedding_size % 2 == 1:
            # zero pad
            emb = torch.cat([emb, torch.zeros(max_length, 1)], dim=1)
        return emb

[docs]    def forward(self, input_seq, offset=0):
        """
        Args:
            input_seq (torch.Tensor): input sequence, shape [batch_size, sequence_length].
        
        Returns:
            torch.Tensor: position embedding, shape [batch_size, sequence_length, embedding_size].
        """
        batch_size, seq_len = input_seq.size()
        max_position = seq_len + offset
        if self.weights is None or max_position > self.weights.size(0):
            # recompute/expand embeddings if needed
            self.weights = self.get_embedding(
                max_position,
                self.embedding_size,
            )

        positions = offset + torch.arange(seq_len)
        pos_embeddings = self.weights.index_select(0, positions).unsqueeze(0).expand(batch_size, -1, -1).detach()
        return pos_embeddings

[docs]class PositionalEncoding(nn.Module):
    def __init__(self, pos_size, dim):
        super(PositionalEncoding, self).__init__()
        pe = torch.rand(pos_size, dim)
        # (0, 1) => (-1, 1)
        pe = pe * 2 - 1
        self.pe = nn.Parameter(pe)
    
[docs]    def forward(self, input):
        output = input + self.pe[:input.size(1)]
        return output
    

[docs]class EPTPositionalEncoding(nn.Module):
    """
    Positional encoding that extends trigonometric embedding proposed in 'Attention is all you need'
    """

    def __init__(self, embedding_dim):
        """
        Instantiate positional encoding instance.

        :param int embedding_dim:
            Dimension of embedding vector
        """

        super().__init__()
        #: Dimension of embedding vector
        self.embedding_dim = embedding_dim

        # The output will be c_p * cos(a_p * t + b_p) + d_p * sin(a_p * t + b_p), where t=index and p = 1...embed_dim
        # From "Attention is all you need" paper.
        # Here, b_p = 0 and a_2p = a_{2p+1} = 1 / 10000^{2p/embed_dim}.
        # Thus, we need to define a_p only.
        div_term = (torch.arange(0, embedding_dim) // 2) * 2
        div_term = torch.exp(div_term.float() * (-math.log(10000.0) / embedding_dim))
        # Note: c_p = 1 if p is odd, 0 otherwise and d_p = 1 if p is even, 0 otherwise
        multiplier = torch.zeros(2, embedding_dim, dtype=torch.float)
        multiplier[0, 1::2] = 1.0  # Only use cosine for odd indices
        multiplier[1, 0::2] = 1.0  # Only use sine for even indices

        # Fix a_p, c_p, d_p values.
        self.register_buffer('_div_term', div_term)
        self.register_buffer('multiplier', multiplier)

    @property
    def device(self) -> torch.device:
        """
        Get the device where weights are currently put.
        :rtype: torch.device
        :return: Device instance
        """
        return self._div_term.device

[docs]    def before_trigonometric(self, indices: torch.Tensor) -> torch.Tensor:
        """
        Compute a_p * t + b_p for each index t.
        :param torch.Tensor indices: A Long tensor to compute indices.
        :rtype: torch.Tensor
        :return: Tensor whose values are a_p * t + b_p for each (t, p) entry.
        """
        indices = indices.float()

        # Compute a_p * t.
        return indices * self._div_term

[docs]    def forward(self, index_or_range, ignored_index=-1) -> torch.Tensor:
        """
        Compute positional encoding. If this encoding is not learnable, the result cannot have any gradient vector.

        .. math::
            P_{t, p} = c_p * \\cos(a_p * t + b_p) + d_p * \\sin(a_p * t + b_p).

        :param Union[torch.Tensor,int,range] index_or_range:
            Value that represents positional encodings to be built.
            - A Tensor value indicates indices itself.
            - A integer value indicates indices from 0 to the value
            - A range value indicates indices within the range.
        :param int ignored_index: The index to be ignored. `PAD_ID` by default.
        :rtype: torch.Tensor
        :return:
            Positional encoding of given value.
            - If torch.Tensor of shape [*, L] is given, this will have shape [*, L, E] if L is not 1, otherwise [*, E].
            - If integer or range is given, this will have shape [T, E], where T is the length of range.
        """
        # we don't need to compute gradients.
        with torch.no_grad():
            return self._forward(index_or_range, ignored_index)

[docs]    def _forward(self, index_or_range, ignored_index=-1) -> torch.Tensor:
        """
        Compute positional encoding

        .. math::
            P_{t, p} = c_p * \\cos(a_p * t + b_p) + d_p * \\sin(a_p * t + b_p).

        :param Union[torch.Tensor,int,range] index_or_range:
            Value that represents positional encodings to be built.
            - A Tensor value indicates indices itself.
            - A integer value indicates indices from 0 to the value
            - A range value indicates indices within the range.
        :param int ignored_index: The index to be ignored. `PAD_ID` by default.
        :rtype: torch.Tensor
        :return:
            Positional encoding of given value.
            - If torch.Tensor of shape [*, L] is given, this will have shape [*, L, E] if L is not 1, otherwise [*, E].
            - If integer or range is given, this will have shape [T, E], where T is the length of range.
        """
        if type(index_or_range) is int:
            # Build Long Tensor of [0, ..., index-1]
            indices = torch.arange(0, index_or_range)
        elif type(index_or_range) is range:
            # Build Long Tensor of [range]
            indices = torch.as_tensor(list(index_or_range))
        else:
            indices = index_or_range

        # Unsqueeze the last dimension to pass the linear layer.
        indices = indices.unsqueeze(-1)

        # Send indices to device that currently using.
        indices = indices.to(self.device)

        # Now indices will have shape [*, 1], we can apply the linear layer, a_p * t + b_p.
        phase = self.before_trigonometric(indices)

        # Phase has shape [*, E]. Apply cosine and sine function on the phase.
        cos_value = phase.cos()
        sin_value = phase.sin()

        # Retrieve c_p and d_p vectors. These have shape [E].
        cos_multiplier = self.multiplier[0]
        sin_multiplier = self.multiplier[1]

        # To multiply c_p and d_p on [*, E], unsqueeze c_p and d_p to fit [*].
        # Make the dimension of c_p the same
        result_shape = [1] * (phase.dim() - 1) + [-1]
        cos_multiplier = cos_multiplier.view(*result_shape)
        sin_multiplier = sin_multiplier.view(*result_shape)

        # Compute c_p * cos(phase) + d_p * sin(phase). Shape will be [*, E].
        result = cos_value * cos_multiplier + sin_value * sin_multiplier

        # Fill ignored indices as zero.
        ignored_indices = (indices == ignored_index)
        if ignored_indices.any():
            result.masked_fill_(ignored_indices, 0.0)

        # Return value. Shape [*, E]
        return result.contiguous()


[docs]class DisPositionalEncoding(nn.Module):

    def __init__(self, embedding_size, max_len):
        super(DisPositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, embedding_size)
        pe.require_grad = False

        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, embedding_size, 2).float() * (-torch.log(torch.tensor(10000.0)) / embedding_size))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        self.position_encoding = nn.Embedding(max_len, embedding_size)
        self.position_encoding.weight = nn.Parameter(pe, requires_grad=False)

[docs]    def forward(self, dis_graph, category_num):
        dis_graph_expend = dis_graph.unsqueeze(1)  # B*1*S*S
        ZeroPad = nn.ZeroPad2d(padding=(0, category_num, 0, category_num))  # B*1*S+c*S+C
        dis_graph_expend = ZeroPad(dis_graph_expend)
        input_pos = dis_graph_expend.squeeze(1).long()
        return self.position_encoding(input_pos)