tensorflow - 多维 lstm tensorflow-6ren

tensorflow - 多维 lstm tensorflow

In lại 作者：行者123 更新时间：2023-12-03 12:31:51

有人可以建议改进我对多维 lstm 的实现吗？

它非常慢并且使用大量内存。

class MultiDimentionalLSTMCell(tf.nn.rnn_cell.RNNCell):
"""
Adapted from TF's BasicLSTMCell to use Layer Normalization.
Note that state_is_tuple is always True.
"""

def __init__(self, num_units, forget_bias=1.0, activation=tf.nn.tanh):
    self._num_units = num_units
    self._forget_bias = forget_bias
    self._activation = activation

@property
def state_size(self):
    return tf.nn.rnn_cell.LSTMStateTuple(self._num_units, self._num_units)

@property
def output_size(self):
    return self._num_units

def __call__(self, inputs, state, scope=None):
    """Long short-term memory cell (LSTM).
    @param: imputs (batch,n)
    @param state: the states and hidden unit of the two cells
    """
    with tf.variable_scope(scope or type(self).__name__):
        c1,c2,h1,h2 = state

        # change bias argument to False since LN will add bias via shift
        concat = tf.nn.rnn_cell._linear([inputs, h1, h2], 5 * self._num_units, False)

        i, j, f1, f2, o = tf.split(1, 5, concat)

        # add layer normalization to each gate
        #i = ln(i, scope = 'i/')
        #j = ln(j, scope = 'j/')
        #f1 = ln(f1, scope = 'f1/')
        #f2 = ln(f2, scope = 'f2/')
        #o = ln(o, scope = 'o/')

        new_c = (c1 * tf.nn.sigmoid(f1 + self._forget_bias) + 
                 c2 * tf.nn.sigmoid(f2 + self._forget_bias) + tf.nn.sigmoid(i) *
               self._activation(j))

        # add layer_normalization in calculation of new hidden state
        new_h = self._activation(ln(new_c, scope = 'new_h/')) * tf.nn.sigmoid(o)
        new_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h)

        return new_h, new_state


def MultidimentionalRNN(rnn_size,input_data,sh,dims=None,scopeN="layer1"):
    """Implements naive multidimentional recurent neural networks

    @param rnn_size: the hidden units
    @param input_data: the data to process of shape [batch,h,w,chanels]
    @param sh: [heigth,width] of the windows 
    @param dims: dimentions to reverse the input data,eg.
        dims=[False,True,True,False] => true means reverse dimention
    @param scopeN : the scope

    returns [batch,h/sh[0],w/sh[1],chanels*sh[0]*sh[1]] the output of the lstm
    """
    with tf.variable_scope("MultiDimentionalLSTMCell-"+scopeN):
        cell = MultiDimentionalLSTMCell(rnn_size)

    shape = input_data.get_shape().as_list()
    # add paddings
    #todos: 
    #y = tf.cond(condition > 0, lambda: tf.matmul(x, W) + b, lambda: tf.matmul(x, W) - b)
    if shape[1]%sh[0] != 0:
        offset = tf.zeros([shape[0], sh[0]-(shape[1]%sh[0]), shape[2], shape[3]])
        input_data = tf.concat(1,[input_data,offset])
        shape = input_data.get_shape().as_list()
    if shape[2]%sh[1] != 0:
        offset = tf.zeros([shape[0], shape[1], sh[1]-(shape[2]%sh[1]), shape[3]])
        input_data = tf.concat(2,[input_data,offset])
        shape = input_data.get_shape().as_list()

    w,h = int(shape[1]/sh[0]),int(shape[2]/sh[1])
    features = sh[1]*sh[0]*shape[3]
    batch_size = shape[0]

    x = tf.reshape(input_data, [batch_size,h,w, features])
    if dims is not None:
        x = tf.reverse(x, dims)  
    x = tf.transpose(x, [1,2,0,3])
    x = tf.reshape(x, [-1, features])
    x = tf.split(0, h*w, x)
    states = []
    outputs = []
    #todo: add seq_len 2D (have to add paddings after)
    #use tf.get_variable()
    #result = tf.while_loop(condition, body, [x])
    with tf.variable_scope("MultiDimentionalRnn-"+scopeN) as scope:
        for i,inputs in enumerate(x): 
                #stateUp = tf.cond(i>=w, lambda: states[i-w], lambda: cell.zero_state(batch_size, tf.float32))
                stateUp = states[i-w] if i>=w else cell.zero_state(batch_size, tf.float32)
                #stateLast = tf.cond(i%w>0, lambda: states[i-1], lambda: cell.zero_state(batch_size, tf.float32))
                stateLast = states[i-1] if i%w>0 else cell.zero_state(batch_size, tf.float32)

                currentState = stateUp[0],stateLast[0],stateUp[1],stateLast[1]
                out , state = cell(inputs,currentState)                    
                states.append(state)
                outputs.append(out)
                scope.reuse_variables()
    outputs = tf.pack(outputs, axis=0)

    y = tf.reshape(outputs, [h,w,batch_size,rnn_size])
    y = tf.transpose(y, [2,0,1,3])
    if dims is not None:
        y = tf.reverse(y, dims)

    return y

1 Câu trả lời

def ln(tensor, scope = None, epsilon = 1e-5):
    """ Layer normalizes a 2D tensor along its second axis """
    assert(len(tensor.get_shape()) == 2)
    m, v = tf.nn.moments(tensor, [1], keep_dims=True)
    if not isinstance(scope, str):
        scope = ''
    with tf.variable_scope(scope + 'layer_norm'):
        scale = tf.get_variable('scale',
                                shape=[tensor.get_shape()[1]],
                                initializer=tf.constant_initializer(1))
        shift = tf.get_variable('shift',
                                shape=[tensor.get_shape()[1]],
                                initializer=tf.constant_initializer(0))
    LN_initial = (tensor - m) / tf.sqrt(v + epsilon)

    return LN_initial * scale + shift


class MultiDimentionalLSTMCell(tf.nn.rnn_cell.RNNCell):
    """
    Adapted from TF's BasicLSTMCell to use Layer Normalization.
    Note that state_is_tuple is always True.
    """

    def __init__(self, num_units, forget_bias=0.0, activation=tf.nn.tanh):
        self._num_units = num_units
        self._forget_bias = forget_bias
        self._activation = activation

    @property
    def state_size(self):
        return tf.nn.rnn_cell.LSTMStateTuple(self._num_units, self._num_units)

    @property
    def output_size(self):
        return self._num_units

    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM).
        @param: imputs (batch,n)
        @param state: the states and hidden unit of the two cells
        """
        with tf.variable_scope(scope or type(self).__name__):
            c1,c2,h1,h2 = state

            # change bias argument to False since LN will add bias via shift
            concat = tf.nn.rnn_cell._linear([inputs, h1, h2], 5 * self._num_units, False)

            i, j, f1, f2, o = tf.split(1, 5, concat)

            # add layer normalization to each gate
            i = ln(i, scope = 'i/')
            j = ln(j, scope = 'j/')
            f1 = ln(f1, scope = 'f1/')
            f2 = ln(f2, scope = 'f2/')
            o = ln(o, scope = 'o/')

            new_c = (c1 * tf.nn.sigmoid(f1 + self._forget_bias) + 
                     c2 * tf.nn.sigmoid(f2 + self._forget_bias) + tf.nn.sigmoid(i) *
                   self._activation(j))

            # add layer_normalization in calculation of new hidden state
            new_h = self._activation(ln(new_c, scope = 'new_h/')) * tf.nn.sigmoid(o)
            new_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h)

            return new_h, new_state


def multiDimentionalRNN_whileLoop(rnn_size,input_data,sh,dims=None,scopeN="layer1"):
        """Implements naive multidimentional recurent neural networks

        @param rnn_size: the hidden units
        @param input_data: the data to process of shape [batch,h,w,chanels]
        @param sh: [heigth,width] of the windows 
        @param dims: dimentions to reverse the input data,eg.
            dims=[False,True,True,False] => true means reverse dimention
        @param scopeN : the scope

        returns [batch,h/sh[0],w/sh[1],chanels*sh[0]*sh[1]] the output of the lstm
        """
        with tf.variable_scope("MultiDimentionalLSTMCell-"+scopeN):
            cell = MultiDimentionalLSTMCell(rnn_size)

            shape = input_data.get_shape().as_list()

            if shape[1]%sh[0] != 0:
                offset = tf.zeros([shape[0], sh[0]-(shape[1]%sh[0]), shape[2], shape[3]])
                input_data = tf.concat(1,[input_data,offset])
                shape = input_data.get_shape().as_list()
            if shape[2]%sh[1] != 0:
                offset = tf.zeros([shape[0], shape[1], sh[1]-(shape[2]%sh[1]), shape[3]])
                input_data = tf.concat(2,[input_data,offset])
                shape = input_data.get_shape().as_list()

            h,w = int(shape[1]/sh[0]),int(shape[2]/sh[1])
            features = sh[1]*sh[0]*shape[3]
            batch_size = shape[0]

            x = tf.reshape(input_data, [batch_size,h,w, features])
            if dims is not None:
                assert dims[0] == False and dims[3] == False
                x = tf.reverse(x, dims)
            x = tf.transpose(x, [1,2,0,3])
            x = tf.reshape(x, [-1, features])
            x = tf.split(0, h*w, x)     

            sequence_length = tf.ones(shape=(batch_size,), dtype=tf.int32)*shape[0]
            inputs_ta = tf.TensorArray(dtype=tf.float32, size=h*w,name='input_ta')
            inputs_ta = inputs_ta.unpack(x)
            states_ta = tf.TensorArray(dtype=tf.float32, size=h*w+1,name='state_ta',clear_after_read=False)
            outputs_ta = tf.TensorArray(dtype=tf.float32, size=h*w,name='output_ta')

            states_ta = states_ta.write(h*w, tf.nn.rnn_cell.LSTMStateTuple(tf.zeros([batch_size,rnn_size], tf.float32),
                                                         tf.zeros([batch_size,rnn_size], tf.float32)))
            def getindex1(t,w):
                return tf.cond(tf.less_equal(tf.constant(w),t),
                               lambda:t-tf.constant(w),
                               lambda:tf.constant(h*w))
            def getindex2(t,w):
                return tf.cond(tf.less(tf.constant(0),tf.mod(t,tf.constant(w))),
                               lambda:t-tf.constant(1),
                               lambda:tf.constant(h*w))

            time = tf.constant(0)

            def body(time, outputs_ta, states_ta):
                constant_val = tf.constant(0)
                stateUp = tf.cond(tf.less_equal(tf.constant(w),time),
                                  lambda: states_ta.read(getindex1(time,w)),
                                  lambda: states_ta.read(h*w))
                stateLast = tf.cond(tf.less(constant_val,tf.mod(time,tf.constant(w))),
                                    lambda: states_ta.read(getindex2(time,w)),
                                    lambda: states_ta.read(h*w)) 

                currentState = stateUp[0],stateLast[0],stateUp[1],stateLast[1]
                out , state = cell(inputs_ta.read(time),currentState)  
                outputs_ta = outputs_ta.write(time,out)
                states_ta = states_ta.write(time,state)
                return time + 1, outputs_ta, states_ta

            def condition(time,outputs_ta,states_ta):
                return tf.less(time , tf.constant(h*w)) 

            result , outputs_ta, states_ta = tf.while_loop(condition, body, [time,outputs_ta,states_ta]
                                                           ,parallel_iterations=1)


            outputs = outputs_ta.pack()
            states = states_ta.pack()

            y = tf.reshape(outputs, [h,w,batch_size,rnn_size])
            y = tf.transpose(y, [2,0,1,3])
            if dims is not None:
                y = tf.reverse(y, dims)

            return y,states

关于tensorflow - 多维 lstm tensorflow ，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/42071074/

Bài viết được đề xuất: visual-studio - 是否有热键可固定/自动隐藏诸如解决方案资源管理器之类的面板？

Bài viết được đề xuất: r - 是否有像 switch 这样的函数在 dplyr::mutate 中工作？

Bài viết được đề xuất: pagination - 在 CouchDB 中创建分页索引？

Bài viết được đề xuất: r - 如何使用 R 中的 Leaflet 将两个坐标与一条线连接起来

lstm - LSTM 单元如何映射到层？
我无法准确理解 LSTM 单元的范围——它如何映射到网络层。来自格雷夫斯 (2014): 在我看来，在单层网络中，layer = lstm 单元。这实际上如何在多层 rnn 中工作？三层RNN LS
machine-learning - lstm(256) + lstm(256) 和 lstm(512) 有什么区别？
这是代码 model = Sequential() model.add(LSTM(256, input_shape=(None, 1), return_sequences=True)) model.a
lstm - Pytorch 隐藏状态 LSTM
为什么我们需要在pytorch中初始化LSTM中的隐藏状态h0。由于 h0 无论如何都会被计算并被覆盖？是不是很像整合一个一 = 0 一个= 4 即使我们不做a=0，也应该没问题.. 最佳答案重点
lstm - Deeplearning4j LSTM 时间序列预测示例
我正在尝试使用 LSTM 在 Deeplearning4j 中进行一些简单的时间序列预测，但我很难让它工作。我有一个简单的文本文件，其中包含如下所示的数字列表，并希望网络学习预测下一个数字。有没有
keras - 对如何实现时间分布的 LSTM + LSTM 感到困惑
在大量阅读和绘制图表之后，我想我已经提出了一个模型，我可以将其用作更多测试我需要调整哪些参数和功能的基础。但是，我对如何实现以下测试用例感到困惑(所有数字都比最终模型小几个数量级，但我想从小处着手):
lstm - torch.nn.LSTM 运行时错误
我正在尝试实现“Livelinet:用于预测教育视频中的活力的多模式深度循环神经网络”中的结构。为了简单说明，我将 10 秒音频剪辑分成 10 个 1 秒音频剪辑，并从该 1 秒音频剪辑中获取频谱图
python - Tensorflow LSTM - LSTM 单元上的矩阵乘法
我正在 Tensorflow 中制作 LSTM 神经网络。输入张量大小为 92。 import tensorflow as tf from tensorflow.contrib import rnn
python - 在 LSTM 层之前具有嵌入层的 Keras LSTM
我正在尝试 keras IMDB 数据的示例，数据形状是这样的: x_train shape: (25000, 80) 我只是把keras例子的原始代码改成了这样的代码: model = Sequen
lstm - 如何正确地为 PyTorch 中的嵌入、LSTM 和线性层提供输入？
我需要了解如何使用 torch.nn 的不同组件正确准备批量训练的输入。模块。具体来说，我希望为 seq2seq 模型创建一个编码器-解码器网络。假设我有一个包含这三层的模块，按顺序: nn.Emb
tensorflow - Keras - 有状态 LSTM 与无状态 LSTM
我很难概念化 Keras 中有状态 LSTM 和无状态 LSTM 之间的区别。我的理解是，在每个批处理结束时，在无状态情况下“网络状态被重置”，而对于有状态情况，网络状态会为每个批处理保留，然后必须在
lstm - PyTorch LSTM - 使用词嵌入而不是 nn.Embedding()
nn.Embedding() 是学习 LSTM 所必需的吗？我在 PyTorch 中使用 LSTM 来预测 NER - 此处是类似任务的示例 - https://pytorch.org/tutori
python - 塑造 LSTM 的数据，并将密集层的输出馈送到 LSTM
我正在尝试找出适合我想要拟合的模型的正确语法。这是一个时间序列预测问题，我想在将时间序列输入 LSTM 之前使用一些密集层来改进时间序列的表示。这是我正在使用的虚拟系列: import pandas
deep-learning - 堆叠式 LSTM 网络中每个 LSTM 层的输入是什么？
我在理解堆叠式 LSTM 网络中各层的输入-输出流时遇到了一些困难。假设我已经创建了一个如下所示的堆叠式 LSTM 网络: # parameters time_steps = 10 features
lstm - 将 LSTM 中的 Tanh 激活更改为 ReLU
LSTM 类中的默认非线性激活函数是 tanh。我希望在我的项目中使用 ReLU。浏览文档和其他资源，我无法找到一种简单的方法来做到这一点。我能找到的唯一方法是定义我自己的自定义 LSTMCell，但
lstm - 是否可以在 PyTorch 中使用 LSTMCells 模块实现多层 LSTM？
在 PyTorch 中，有一个 LSTM 模块，除了输入序列、隐藏状态和单元状态之外，它还接受 num_layers 参数，该参数指定我们的 LSTM 有多少层。然而，还有另一个模块 LSTMCel
machine-learning - TensorFlow:在另一个 LSTM 之上的 LSTM
没什么好说的作为介绍:我想在 TensorFlow 中将 LSTM 堆叠在另一个 LSTM 上，但一直被错误阻止，我不太明白，更不用说单独解决了。代码如下: def RNN(_X, _istate,
machine-learning - 双向 LSTM 和 LSTM 有什么区别？
有人可以解释一下吗？我知道双向 LSTM 具有前向和反向传递，但是与单向 LSTM 相比，它有什么优势？它们各自更适合什么？最佳答案 LSTM 的核心是使用隐藏状态保留已经通过它的输入信息。单向
python - LSTM 内的 Tensorflow 序列到序列 LSTM(嵌套)
我想构建一个带有特殊词嵌入的 LSTM，但我对它的工作原理有一些疑问。您可能知道，一些 LSTM 对字符进行操作，因此它是字符输入，字符输出。我想做同样的事情，通过对单词的抽象来学习使用嵌套的 LS
Keras LSTM for continuous output and with EarlyStopping(用于连续输出和早期停止的KERAS LSTM)
我编写了一个LSTM回归模型。它是最后一个LSTM层的BATCH_SIZE=1和RETURN_Sequence=True的模型。我还设置了VERIFICATION_DATA和耐心进行培训。但似乎存在一
python - TensorFlow:为下一批记住 LSTM 状态(有状态 LSTM)
给定一个训练有素的 LSTM 模型，我想对单个时间步执行推理，即以下示例中的 seq_length = 1。在每个时间步之后，需要为下一个“批处理”记住内部 LSTM(内存和隐藏)状态。在推理的最开始

行者123

Hồ sơ cá nhân

Tôi là một lập trình viên xuất sắc, rất giỏi!

Bài viết phổ biến của tác giả

Nhận phiếu giảm giá Didi Taxi miễn phí

Các bài viết nóng hổi trên toàn bộ trang web

trang đầu

đã học

Trí tuệ nhân tạo 6Ren

Trung tâm mua sắm

tensorflow - 多维 lstm tensorflow