Source code for mwptoolkit.module.Layer.layers

# -*- encoding: utf-8 -*-
# @Author: Yihuai Lan
# @Time: 2021/08/29 22:04:38
# @File: layers.py


import torch
from torch import nn

from mwptoolkit.module.Attention.seq_attention import Attention
from mwptoolkit.module.Attention.tree_attention import TreeAttention as Attn

[docs]class GenVar(nn.Module): """ Module to generate variable embedding. Args: dim_encoder_state (int): Dimension of the last cell state of encoder RNN (output of Encoder module). dim_context (int): Dimension of RNN in GenVar module. dim_attn_hidden (int): Dimension of hidden layer in attention. dim_mlp_hiddens (int): Dimension of hidden layers in the MLP that transform encoder state to query of attention. dropout_rate (int): Dropout rate for attention and MLP. """ def __init__(self, dim_encoder_state, dim_context, dim_attn_hidden=256, dropout_rate=0.5): super(GenVar, self).__init__() self.attention = Attention( dim_context, dim_encoder_state, dim_attn_hidden, dropout_rate)
[docs] def forward(self, encoder_state, context, context_lens): """ Generate embedding for an unknown variable. Args: encoder_state (torch.FloatTensor): Last cell state of the encoder (output of Encoder module). context (torch.FloatTensor): Encoded context, with size [batch_size, text_len, dim_hidden]. Return: torch.FloatTensor: Embedding of an unknown variable, with size [batch_size, dim_context] """ attn = self.attention(context, encoder_state.squeeze(0), context_lens) return attn
[docs]class Transformer(nn.Module): def __init__(self, dim_hidden): super(Transformer, self).__init__() self.mlp = nn.Sequential( nn.Linear(2 * dim_hidden, dim_hidden), nn.ReLU(), nn.Linear(dim_hidden, dim_hidden), nn.Tanh() ) self.ret = nn.Parameter(torch.zeros(dim_hidden)) nn.init.normal_(self.ret.data)
[docs] def forward(self, top2): return self.mlp(top2)
[docs]class TreeAttnDecoderRNN(nn.Module): def __init__( self, hidden_size, embedding_size, input_size, output_size, n_layers=2, dropout=0.5): super(TreeAttnDecoderRNN, self).__init__() # Keep for reference self.embedding_size = embedding_size self.hidden_size = hidden_size self.input_size = input_size self.output_size = output_size self.n_layers = n_layers self.dropout = dropout # Define layers self.em_dropout = nn.Dropout(dropout) self.embedding = nn.Embedding(input_size, embedding_size, padding_idx=0) self.gru = nn.GRU(hidden_size + embedding_size, hidden_size, n_layers, dropout=dropout) self.concat = nn.Linear(hidden_size * 2, hidden_size) self.out = nn.Linear(hidden_size, output_size) # Choose attention model self.attn = Attn(hidden_size,hidden_size)
[docs] def forward(self, input_seq, last_hidden, encoder_outputs, seq_mask): # Get the embedding of the current input word (last output word) batch_size = input_seq.size(0) embedded = self.embedding(input_seq) embedded = self.em_dropout(embedded) embedded = embedded.view(1, batch_size, self.embedding_size) # S=1 x B x N # Calculate attention from current RNN state and all encoder outputs; # apply to encoder outputs to get weighted average attn_weights = self.attn(last_hidden[-1].unsqueeze(0), encoder_outputs, seq_mask) context = attn_weights.bmm(encoder_outputs.transpose(0, 1)) # B x S=1 x N # Get current hidden state from input word and last hidden state rnn_output, hidden = self.gru(torch.cat((embedded, context.transpose(0, 1)), 2), last_hidden) # Attentional vector using the RNN hidden state and context vector # concatenated together (Luong eq. 5) output = self.out(torch.tanh(self.concat(torch.cat((rnn_output.squeeze(0), context.squeeze(1)), 1)))) # Return final output, hidden state return output, hidden