keras Graph执行中不允许对'tf.Tensor'进行迭代,请使用Eager执行或用@tf.function修饰此函数

我正在尝试实现Tensorflow SegFormer，一个基于Transformers的语义分割模型。我正在tf.keras2.5中按照official PyTorch implementation来实现它。
当我试图构建一个只有一个阶段的简单版本时，我得到了以下错误。

OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.

问题是我不知道这个错误是从哪里来的。我已经实现了ConvMLP，它有一个稍微相同的架构，我没有得到任何错误。循环

src/model/backbone/mit.py:438 call  *
   inputs = blk(inputs)

whihc似乎是错误的根源来自我对ConvMLP的实现，它工作起来没有问题。
下面是完整的回溯。
回溯（最近的呼叫排在最后）：
文件“src/model/backbone/mit.py“，第520行，在模型= get_mix_vision_transformer（
文件“src/model/backbone/mit.py“，第489行，在get_mix_vision_transformer文件中，fmap_out = stageBlock（
文件“/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py”，第969行，在调用中返回自定义。
文件“/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py”，第1107行，在函数构造调用输出中= self. keras_tensor_symbolic_call（
文件“/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py”，第840行，在keras_tensor_symbolic_call中返回自定义输出签名（输入、参数、kwargs、输入掩码）
文件“/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py”，第880行，在推断输出签名中，输出=调用fn（输入，* 参数，kwargs）
文件“/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py“，第695行，在 Package 器中引发e.ag错误元数据。
不允许在图形中使用操作符错误：在用户代码中：
请输入您的输入值。
这是一个很好的例子，它可以帮助你更好地理解如何使用它。
这是一个很好的例子。
/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py：520iter**self._disallow_iteration（）/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py：516_disallow_iteration self._disallow_in_graph_mode（“在tf.Tensor上迭代“）/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py：494_disallow_in_graph_mode引发错误。操作符不允许在图形中出现错误（
图形中不允许运算符错误：Graph执行中不允许对tf.Tensor进行迭代。请使用Eager执行或使用@ tf. function修饰此函数。
下面是完整的可复制性代码，Python3.8，TensorFlow 2.5，Ubuntu 20.04。

from typing import Any, Dict, List

import numpy as np
import tensorflow as tf
from loguru import logger
from tensorflow.keras.layers import (
    Conv2D,
    Dense,
    DepthwiseConv2D,
    Dropout,
    Input,
    LayerNormalization,
    Permute,
    Reshape,
)
from tensorflow.keras.models import Model, Sequential

# Referred from: github.com:rwightman/pytorch-image-models.
# https://keras.io/examples/vision/cct/#stochastic-depth-for-regularization
class StochasticDepth(tf.keras.layers.Layer):
    def __init__(
        self,
        drop_prop,
        *args,
        **kwargs,
    ) -> None:
        super().__init__(*args, **kwargs)

        self.drop_prob = drop_prop

    def call(self, inputs, training=None) -> tf.Tensor:
        if training:
            keep_prob = tf.cast(1 - self.drop_prob, dtype=inputs.dtype)
            shape = (tf.shape(inputs)[0],) + (1,) * (len(tf.shape(inputs)) - 1)
            random_tensor = keep_prob + tf.random.uniform(
                shape, 0, 1, dtype=inputs.dtype
            )
            random_tensor = tf.floor(random_tensor)
            return (inputs / keep_prob) * random_tensor
        return inputs

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update({"drop_prob": self.drop_prob})
        return config

class Identity(tf.keras.layers.Layer):
    def __init__(self) -> None:
        super().__init__(name="IdentityTF")

    def call(self, inputs) -> tf.Tensor:
        return inputs

class OverlapPatchEmbed(tf.keras.layers.Layer):
    def __init__(
        self,
        patch_size: int = 7,
        strides: int = 4,
        emb_dim: int = 768,
        l2_regul: float = 1e-4,
        *args,
        **kwargs,
    ) -> None:

        super().__init__(*args, **kwargs)

        self.patch_size = patch_size
        self.strides = strides
        self.emb_dim = emb_dim
        self.l2_regul = l2_regul

        self.norm = LayerNormalization()

    def build(self, input_shape) -> None:

        _, height, width, channels = input_shape

        self.H = height // self.patch_size
        self.W = width // self.patch_size

        self.proj = Conv2D(
            self.emb_dim,
            kernel_size=self.patch_size,
            strides=self.strides,
            padding="same",
            use_bias=False,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.reshape = Reshape(target_shape=(self.H * self.W, -1))

    def call(self, inputs, training=None) -> tf.Tensor:

        fmap = self.proj(inputs)
        fmap = self.reshape(fmap)
        return self.norm(fmap)

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update(
            {
                "patch_size": self.patch_size,
                "strides": self.strides,
                "emb_dim": self.emb_dim,
                "l2_regul": self.l2_regul,
            }
        )
        return config

class Mlp(tf.keras.layers.Layer):
    def __init__(
        self,
        fc1_units: int,
        fc2_units: int,
        l2_regul: float = 1e-4,
        *args,
        **kwargs,
    ) -> None:

        super().__init__(*args, **kwargs)

        self.fc1_units = fc1_units
        self.fc2_units = fc2_units
        self.l2_regul = l2_regul

        self.gelu = tf.keras.activations.gelu

    def build(self, input_shape) -> None:

        _, units, _ = input_shape

        height = int(tf.sqrt(float(units)))
        width = int(tf.sqrt(float(units)))

        self.square_reshape = Reshape(target_shape=(height, width, -1))
        self.wide_reshape = Reshape(target_shape=(units, -1))

        self.fc1 = Dense(
            units=self.fc1_units,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.fc2 = Dense(
            units=self.fc2_units,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.depth_conv = DepthwiseConv2D(
            depth_multiplier=1,
            kernel_size=3,
            strides=1,
            padding="same",
            use_bias=False,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

    def call(self, inputs, training=None) -> tf.Tensor:
        fmap = self.fc1(inputs)

        fmap = self.square_reshape(fmap)
        fmap = self.depth_conv(fmap)
        fmap = self.wide_reshape(fmap)

        fmap = self.gelu(fmap)
        return self.fc2(fmap)

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update(
            {
                "fc1_units": self.fc1_units,
                "fc2_units": self.fc2_units,
                "l2_regularization": self.l2_regul,
            }
        )
        return config

class Attention(tf.keras.layers.Layer):
    def __init__(
        self,
        fc_units: int,
        num_heads: int = 8,
        attn_drop_prob: float = 0,
        proj_drop_prob: float = 0,
        attn_reduction_ratio: int = 1,
        l2_regul: float = 1e-4,
        *args,
        **kwargs,
    ) -> None:

        super().__init__(*args, **kwargs)
        assert (
            fc_units % num_heads == 0
        ), f"dim {fc_units} should be divided by num_heads {num_heads}."

        self.fc_units = fc_units
        self.num_heads = num_heads
        self.attn_drop_prob = attn_drop_prob
        self.proj_drop_prob = proj_drop_prob
        self.attn_reduction_ratio = attn_reduction_ratio
        self.l2_regul = l2_regul

        self.head_dims = fc_units / num_heads
        self.scale = 1 / tf.sqrt(self.head_dims)

        self.softmax = tf.keras.activations.softmax

    def build(self, input_shape) -> None:

        _, units, _ = input_shape

        height = int(tf.sqrt(float(units)))
        width = int(tf.sqrt(float(units)))

        reduction_height = height // self.attn_reduction_ratio
        reduction_width = width // self.attn_reduction_ratio

        self.heads_reshape = Reshape(target_shape=(units, self.num_heads, -1))
        self.square_reshape = Reshape(target_shape=(height, width, -1))
        self.wide_reshape = Reshape(target_shape=(units, -1))
        self.wide_reduction_reshape = Reshape(
            target_shape=(reduction_height * reduction_width, -1)
        )
        self.kv_reshape = Reshape(
            target_shape=(-1, 2, self.num_heads, int(self.head_dims))
        )

        self.query = Dense(
            units=self.fc_units,
            use_bias=False,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.key_value = Dense(
            units=self.fc_units * 2,
            use_bias=False,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.proj = Dense(
            units=self.fc_units,
            kernel_initializer="he_uniform",
            kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
        )

        self.attn_drop = Dropout(rate=self.attn_drop_prob)
        self.proj_drop = Dropout(rate=self.proj_drop_prob)

        self.permute = Permute((2, 1, 3))

        if self.attn_reduction_ratio > 1:
            self.attn_conv = Conv2D(
                self.fc_units,
                kernel_size=self.attn_reduction_ratio,
                strides=self.attn_reduction_ratio,
                padding="same",
                use_bias=False,
                kernel_initializer="he_uniform",
                kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
            )
            self.norm = LayerNormalization()

    def call(self, inputs, training=None) -> tf.Tensor:
        queries = self.query(inputs)

        queries = self.heads_reshape(queries)
        queries = self.permute(queries)

        fmap = inputs
        if self.attn_reduction_ratio > 1:
            fmap = self.square_reshape(fmap)
            fmap = self.attn_conv(fmap)
            fmap = self.wide_reduction_reshape(fmap)
            fmap = self.norm(fmap)

        fmap = self.key_value(fmap)
        fmap = self.kv_reshape(fmap)
        fmap = tf.transpose(fmap, perm=[2, 0, 3, 1, 4])
        keys, values = fmap

        attn = tf.matmul(queries, keys, transpose_b=True) * self.scale
        attn = self.softmax(attn)
        attn = self.attn_drop(attn)

        x = tf.matmul(attn, values)
        x = tf.transpose(x, perm=[0, 2, 1, 3])
        x = self.wide_reshape(x)
        x = self.proj(x)

        return self.proj_drop(x)

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update(
            {
                "fc_units": self.fc_units,
                "num_heads": self.num_heads,
                "attn_drop_prob": self.attn_drop_prob,
                "proj_drop_prob": self.proj_drop_prob,
                "attn_reduction_ratio": self.attn_reduction_ratio,
                "l2_regul": self.l2_regul,
            }
        )
        return config

class FFNAttentionBlock(tf.keras.layers.Layer):
    def __init__(
        self,
        fc_units: int,
        num_heads: int = 8,
        mlp_ratio: int = 4,
        attn_drop_prob: float = 0,
        proj_drop_prob: float = 0,
        attn_reduction_ratio: int = 1,
        stochastic_depth_rate: float = 0.1,
        *args,
        **kwargs,
    ) -> None:

        super().__init__(*args, **kwargs)

        self.fc_units = fc_units
        self.num_heads = num_heads
        self.mlp_ratio = mlp_ratio
        self.attn_drop_prob = attn_drop_prob
        self.proj_drop_prob = proj_drop_prob
        self.attn_reduction_ratio = attn_reduction_ratio
        self.stochastic_depth_rate = stochastic_depth_rate

    def build(self, input_shape) -> None:

        self.attn = Attention(
            fc_units=self.fc_units,
            num_heads=self.num_heads,
            attn_drop_prob=self.attn_drop_prob,
            proj_drop_prob=self.proj_drop_prob,
            attn_reduction_ratio=self.attn_reduction_ratio,
        )

        self.stochastic_drop = (
            StochasticDepth(drop_prop=self.stochastic_depth_rate)
            if self.stochastic_depth_rate > 0
            else Identity()
        )

        self.mlp = Mlp(
            fc1_units=self.fc_units * self.mlp_ratio,
            fc2_units=self.fc_units,
        )

        self.norm1 = LayerNormalization()
        self.norm2 = LayerNormalization()

    def call(self, inputs, training=None) -> tf.Tensor:

        fmap = inputs + self.stochastic_drop(self.attn(self.norm1(inputs)))
        fmap = fmap + self.stochastic_drop(self.mlp(self.norm2(fmap)))

        return fmap

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update(
            {
                "fc_units": self.fc_units,
                "num_heads": self.num_heads,
                "mlp_ratio": self.mlp_ratio,
                "attn_drop_prob": self.attn_drop_prob,
                "proj_drop_prob": self.proj_drop_prob,
                "attn_reduction_ratio": self.attn_reduction_ratio,
                "stochastic_depth_rate": self.stochastic_depth_rate,
            }
        )
        return config

class StageBlock(tf.keras.layers.Layer):
    def __init__(
        self,
        fc_units: int,
        depth: int,
        num_heads: int = 8,
        mlp_ratio: int = 4,
        attn_drop_prob: float = 0,
        proj_drop_prob: float = 0,
        attn_reduction_ratio: int = 1,
        stochastic_depth_rate: float = 0.1,
        *args,
        **kwargs,
    ) -> None:

        super().__init__(*args, **kwargs)

        self.fc_units = fc_units
        self.num_heads = num_heads
        self.mlp_ratio = mlp_ratio
        self.attn_drop_prob = attn_drop_prob
        self.proj_drop_prob = proj_drop_prob
        self.attn_reduction_ratio = attn_reduction_ratio
        self.stochastic_depth_rate = stochastic_depth_rate
        self.depth = depth

    def build(self, input_shape) -> None:

        self.blocks = [
            FFNAttentionBlock(
                fc_units=self.fc_units,
                num_heads=self.num_heads,
                mlp_ratio=self.mlp_ratio,
                attn_drop_prob=self.attn_drop_prob,
                proj_drop_prob=self.proj_drop_prob,
                attn_reduction_ratio=self.attn_reduction_ratio,
                stochastic_depth_rate=self.stochastic_depth_rate,
            )
            for _ in range(self.depth)
        ]

    def call(self, inputs, training=None) -> tf.Tensor:

        for blk in self.blocks:
            inputs = blk(inputs)

        return inputs

    def get_config(self) -> Dict[str, Any]:

        config = super().get_config()
        config.update(
            {
                "fc_units": self.fc_units,
                "depth": self.depth,
                "num_heads": self.num_heads,
                "mlp_ratio": self.mlp_ratio,
                "attn_drop_prob": self.attn_drop_prob,
                "proj_drop_prob": self.proj_drop_prob,
                "attn_reduction_ratio": self.attn_reduction_ratio,
                "stochastic_depth_rate": self.stochastic_depth_rate,
            }
        )
        return config

def get_mix_vision_transformer(
    img_shape: List[int],
    patch_size: List[int],
    strides: List[int],
    emb_dims: List[int],
    num_heads: List[int],
    mlp_ratios: List[int],
    proj_drop_prob: float,
    attn_drop_prob: float,
    stochastic_depth_rate: float,
    attn_reduction_ratios: List[int],
    depths: List[int],
) -> tf.keras.Model:
    """Instantiate a MiT model.

    Returns:
        A `tf.keras` model.
    """

    dpr = [
        rates for rates in np.linspace(0, stochastic_depth_rate, np.sum(depths))
    ]

    img_input = Input(img_shape)

    fmap = OverlapPatchEmbed(
        patch_size=patch_size[0], strides=strides[0], emb_dim=emb_dims[0]
    )(img_input)

    fmap_out = StageBlock(
        fc_units=emb_dims[0],
        depth=depths[0],
        num_heads=num_heads[0],
        mlp_ratio=mlp_ratios[0],
        attn_drop_prob=attn_drop_prob,
        proj_drop_prob=proj_drop_prob,
        attn_reduction_ratio=attn_reduction_ratios[0],
        stochastic_depth_rate=dpr[0],
        name="stage_1",
    )(fmap)

    return Model(img_input, fmap_out)

if __name__ == "__main__":

    fmap = np.random.rand(1, 224, 224, 3)

    patch_size = [7, 3, 3, 3]
    strides = [4, 2, 2, 2]
    emb_dims = [64, 128, 256, 512]
    num_heads = [1, 2, 4, 8]
    mlp_ratios = [4, 4, 4, 4]
    proj_drop_prob = 0
    attn_drop_prob = 0
    stochastic_depth_rate = 0
    attn_reduction_ratios = [8, 4, 2, 1]
    depths = [3, 4, 6, 3]

    # out = StageBlock(fc_units=16, depth=4)(fmap)
    model = get_mix_vision_transformer(
        img_shape=[224, 224, 3],
        patch_size=patch_size,
        strides=strides,
        emb_dims=emb_dims,
        num_heads=num_heads,
        mlp_ratios=mlp_ratios,
        proj_drop_prob=proj_drop_prob,
        attn_drop_prob=attn_drop_prob,
        stochastic_depth_rate=stochastic_depth_rate,
        attn_reduction_ratios=attn_reduction_ratios,
        depths=depths,
    )
    out = model(fmap)
    print(f"{out.shape.as_list()}")
    model.summary()

keras Graph执行中不允许对'tf.Tensor'进行迭代,请使用Eager执行或用@tf.function修饰此函数

1条答案

相关问题

热门标签

最新问答