我正在尝试实现Tensorflow SegFormer,一个基于Transformers的语义分割模型。我正在tf.keras2.5中按照official PyTorch implementation来实现它。
当我试图构建一个只有一个阶段的简单版本时,我得到了以下错误。
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.
问题是我不知道这个错误是从哪里来的。我已经实现了ConvMLP,它有一个稍微相同的架构,我没有得到任何错误。循环
src/model/backbone/mit.py:438 call *
inputs = blk(inputs)
whihc似乎是错误的根源来自我对ConvMLP的实现,它工作起来没有问题。
下面是完整的回溯。
回溯(最近的呼叫排在最后):
文件“src/model/backbone/mit.py“,第520行,在模型= get_mix_vision_transformer(
文件“src/model/backbone/mit.py“,第489行,在get_mix_vision_transformer文件中,fmap_out = stageBlock(
文件“/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py”,第969行,在调用中返回自定义。
文件“/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py”,第1107行,在函数构造调用输出中= self. keras_tensor_symbolic_call(
文件“/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py”,第840行,在keras_tensor_symbolic_call中返回自定义输出签名(输入、参数、kwargs、输入掩码)
文件“/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/base_layer.py”,第880行,在推断输出签名中,输出=调用fn(输入,* 参数,kwargs)
文件“/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py“,第695行,在 Package 器中引发e.ag错误元数据。
不允许在图形中使用操作符错误:在用户代码中:
请输入您的输入值。
这是一个很好的例子,它可以帮助你更好地理解如何使用它。
这是一个很好的例子。
/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py:520iter**self._disallow_iteration()/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py:516_disallow_iteration self._disallow_in_graph_mode(“在tf.Tensor
上迭代“)/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py:494_disallow_in_graph_mode引发错误。操作符不允许在图形中出现错误(
图形中不允许运算符错误:Graph执行中不允许对tf.Tensor
进行迭代。请使用Eager执行或使用@ tf. function修饰此函数。
下面是完整的可复制性代码,Python3.8,TensorFlow 2.5,Ubuntu 20.04。
from typing import Any, Dict, List
import numpy as np
import tensorflow as tf
from loguru import logger
from tensorflow.keras.layers import (
Conv2D,
Dense,
DepthwiseConv2D,
Dropout,
Input,
LayerNormalization,
Permute,
Reshape,
)
from tensorflow.keras.models import Model, Sequential
# Referred from: github.com:rwightman/pytorch-image-models.
# https://keras.io/examples/vision/cct/#stochastic-depth-for-regularization
class StochasticDepth(tf.keras.layers.Layer):
def __init__(
self,
drop_prop,
*args,
**kwargs,
) -> None:
super().__init__(*args, **kwargs)
self.drop_prob = drop_prop
def call(self, inputs, training=None) -> tf.Tensor:
if training:
keep_prob = tf.cast(1 - self.drop_prob, dtype=inputs.dtype)
shape = (tf.shape(inputs)[0],) + (1,) * (len(tf.shape(inputs)) - 1)
random_tensor = keep_prob + tf.random.uniform(
shape, 0, 1, dtype=inputs.dtype
)
random_tensor = tf.floor(random_tensor)
return (inputs / keep_prob) * random_tensor
return inputs
def get_config(self) -> Dict[str, Any]:
config = super().get_config()
config.update({"drop_prob": self.drop_prob})
return config
class Identity(tf.keras.layers.Layer):
def __init__(self) -> None:
super().__init__(name="IdentityTF")
def call(self, inputs) -> tf.Tensor:
return inputs
class OverlapPatchEmbed(tf.keras.layers.Layer):
def __init__(
self,
patch_size: int = 7,
strides: int = 4,
emb_dim: int = 768,
l2_regul: float = 1e-4,
*args,
**kwargs,
) -> None:
super().__init__(*args, **kwargs)
self.patch_size = patch_size
self.strides = strides
self.emb_dim = emb_dim
self.l2_regul = l2_regul
self.norm = LayerNormalization()
def build(self, input_shape) -> None:
_, height, width, channels = input_shape
self.H = height // self.patch_size
self.W = width // self.patch_size
self.proj = Conv2D(
self.emb_dim,
kernel_size=self.patch_size,
strides=self.strides,
padding="same",
use_bias=False,
kernel_initializer="he_uniform",
kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
)
self.reshape = Reshape(target_shape=(self.H * self.W, -1))
def call(self, inputs, training=None) -> tf.Tensor:
fmap = self.proj(inputs)
fmap = self.reshape(fmap)
return self.norm(fmap)
def get_config(self) -> Dict[str, Any]:
config = super().get_config()
config.update(
{
"patch_size": self.patch_size,
"strides": self.strides,
"emb_dim": self.emb_dim,
"l2_regul": self.l2_regul,
}
)
return config
class Mlp(tf.keras.layers.Layer):
def __init__(
self,
fc1_units: int,
fc2_units: int,
l2_regul: float = 1e-4,
*args,
**kwargs,
) -> None:
super().__init__(*args, **kwargs)
self.fc1_units = fc1_units
self.fc2_units = fc2_units
self.l2_regul = l2_regul
self.gelu = tf.keras.activations.gelu
def build(self, input_shape) -> None:
_, units, _ = input_shape
height = int(tf.sqrt(float(units)))
width = int(tf.sqrt(float(units)))
self.square_reshape = Reshape(target_shape=(height, width, -1))
self.wide_reshape = Reshape(target_shape=(units, -1))
self.fc1 = Dense(
units=self.fc1_units,
kernel_initializer="he_uniform",
kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
)
self.fc2 = Dense(
units=self.fc2_units,
kernel_initializer="he_uniform",
kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
)
self.depth_conv = DepthwiseConv2D(
depth_multiplier=1,
kernel_size=3,
strides=1,
padding="same",
use_bias=False,
kernel_initializer="he_uniform",
kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
)
def call(self, inputs, training=None) -> tf.Tensor:
fmap = self.fc1(inputs)
fmap = self.square_reshape(fmap)
fmap = self.depth_conv(fmap)
fmap = self.wide_reshape(fmap)
fmap = self.gelu(fmap)
return self.fc2(fmap)
def get_config(self) -> Dict[str, Any]:
config = super().get_config()
config.update(
{
"fc1_units": self.fc1_units,
"fc2_units": self.fc2_units,
"l2_regularization": self.l2_regul,
}
)
return config
class Attention(tf.keras.layers.Layer):
def __init__(
self,
fc_units: int,
num_heads: int = 8,
attn_drop_prob: float = 0,
proj_drop_prob: float = 0,
attn_reduction_ratio: int = 1,
l2_regul: float = 1e-4,
*args,
**kwargs,
) -> None:
super().__init__(*args, **kwargs)
assert (
fc_units % num_heads == 0
), f"dim {fc_units} should be divided by num_heads {num_heads}."
self.fc_units = fc_units
self.num_heads = num_heads
self.attn_drop_prob = attn_drop_prob
self.proj_drop_prob = proj_drop_prob
self.attn_reduction_ratio = attn_reduction_ratio
self.l2_regul = l2_regul
self.head_dims = fc_units / num_heads
self.scale = 1 / tf.sqrt(self.head_dims)
self.softmax = tf.keras.activations.softmax
def build(self, input_shape) -> None:
_, units, _ = input_shape
height = int(tf.sqrt(float(units)))
width = int(tf.sqrt(float(units)))
reduction_height = height // self.attn_reduction_ratio
reduction_width = width // self.attn_reduction_ratio
self.heads_reshape = Reshape(target_shape=(units, self.num_heads, -1))
self.square_reshape = Reshape(target_shape=(height, width, -1))
self.wide_reshape = Reshape(target_shape=(units, -1))
self.wide_reduction_reshape = Reshape(
target_shape=(reduction_height * reduction_width, -1)
)
self.kv_reshape = Reshape(
target_shape=(-1, 2, self.num_heads, int(self.head_dims))
)
self.query = Dense(
units=self.fc_units,
use_bias=False,
kernel_initializer="he_uniform",
kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
)
self.key_value = Dense(
units=self.fc_units * 2,
use_bias=False,
kernel_initializer="he_uniform",
kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
)
self.proj = Dense(
units=self.fc_units,
kernel_initializer="he_uniform",
kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
)
self.attn_drop = Dropout(rate=self.attn_drop_prob)
self.proj_drop = Dropout(rate=self.proj_drop_prob)
self.permute = Permute((2, 1, 3))
if self.attn_reduction_ratio > 1:
self.attn_conv = Conv2D(
self.fc_units,
kernel_size=self.attn_reduction_ratio,
strides=self.attn_reduction_ratio,
padding="same",
use_bias=False,
kernel_initializer="he_uniform",
kernel_regularizer=tf.keras.regularizers.l2(l2=self.l2_regul),
)
self.norm = LayerNormalization()
def call(self, inputs, training=None) -> tf.Tensor:
queries = self.query(inputs)
queries = self.heads_reshape(queries)
queries = self.permute(queries)
fmap = inputs
if self.attn_reduction_ratio > 1:
fmap = self.square_reshape(fmap)
fmap = self.attn_conv(fmap)
fmap = self.wide_reduction_reshape(fmap)
fmap = self.norm(fmap)
fmap = self.key_value(fmap)
fmap = self.kv_reshape(fmap)
fmap = tf.transpose(fmap, perm=[2, 0, 3, 1, 4])
keys, values = fmap
attn = tf.matmul(queries, keys, transpose_b=True) * self.scale
attn = self.softmax(attn)
attn = self.attn_drop(attn)
x = tf.matmul(attn, values)
x = tf.transpose(x, perm=[0, 2, 1, 3])
x = self.wide_reshape(x)
x = self.proj(x)
return self.proj_drop(x)
def get_config(self) -> Dict[str, Any]:
config = super().get_config()
config.update(
{
"fc_units": self.fc_units,
"num_heads": self.num_heads,
"attn_drop_prob": self.attn_drop_prob,
"proj_drop_prob": self.proj_drop_prob,
"attn_reduction_ratio": self.attn_reduction_ratio,
"l2_regul": self.l2_regul,
}
)
return config
class FFNAttentionBlock(tf.keras.layers.Layer):
def __init__(
self,
fc_units: int,
num_heads: int = 8,
mlp_ratio: int = 4,
attn_drop_prob: float = 0,
proj_drop_prob: float = 0,
attn_reduction_ratio: int = 1,
stochastic_depth_rate: float = 0.1,
*args,
**kwargs,
) -> None:
super().__init__(*args, **kwargs)
self.fc_units = fc_units
self.num_heads = num_heads
self.mlp_ratio = mlp_ratio
self.attn_drop_prob = attn_drop_prob
self.proj_drop_prob = proj_drop_prob
self.attn_reduction_ratio = attn_reduction_ratio
self.stochastic_depth_rate = stochastic_depth_rate
def build(self, input_shape) -> None:
self.attn = Attention(
fc_units=self.fc_units,
num_heads=self.num_heads,
attn_drop_prob=self.attn_drop_prob,
proj_drop_prob=self.proj_drop_prob,
attn_reduction_ratio=self.attn_reduction_ratio,
)
self.stochastic_drop = (
StochasticDepth(drop_prop=self.stochastic_depth_rate)
if self.stochastic_depth_rate > 0
else Identity()
)
self.mlp = Mlp(
fc1_units=self.fc_units * self.mlp_ratio,
fc2_units=self.fc_units,
)
self.norm1 = LayerNormalization()
self.norm2 = LayerNormalization()
def call(self, inputs, training=None) -> tf.Tensor:
fmap = inputs + self.stochastic_drop(self.attn(self.norm1(inputs)))
fmap = fmap + self.stochastic_drop(self.mlp(self.norm2(fmap)))
return fmap
def get_config(self) -> Dict[str, Any]:
config = super().get_config()
config.update(
{
"fc_units": self.fc_units,
"num_heads": self.num_heads,
"mlp_ratio": self.mlp_ratio,
"attn_drop_prob": self.attn_drop_prob,
"proj_drop_prob": self.proj_drop_prob,
"attn_reduction_ratio": self.attn_reduction_ratio,
"stochastic_depth_rate": self.stochastic_depth_rate,
}
)
return config
class StageBlock(tf.keras.layers.Layer):
def __init__(
self,
fc_units: int,
depth: int,
num_heads: int = 8,
mlp_ratio: int = 4,
attn_drop_prob: float = 0,
proj_drop_prob: float = 0,
attn_reduction_ratio: int = 1,
stochastic_depth_rate: float = 0.1,
*args,
**kwargs,
) -> None:
super().__init__(*args, **kwargs)
self.fc_units = fc_units
self.num_heads = num_heads
self.mlp_ratio = mlp_ratio
self.attn_drop_prob = attn_drop_prob
self.proj_drop_prob = proj_drop_prob
self.attn_reduction_ratio = attn_reduction_ratio
self.stochastic_depth_rate = stochastic_depth_rate
self.depth = depth
def build(self, input_shape) -> None:
self.blocks = [
FFNAttentionBlock(
fc_units=self.fc_units,
num_heads=self.num_heads,
mlp_ratio=self.mlp_ratio,
attn_drop_prob=self.attn_drop_prob,
proj_drop_prob=self.proj_drop_prob,
attn_reduction_ratio=self.attn_reduction_ratio,
stochastic_depth_rate=self.stochastic_depth_rate,
)
for _ in range(self.depth)
]
def call(self, inputs, training=None) -> tf.Tensor:
for blk in self.blocks:
inputs = blk(inputs)
return inputs
def get_config(self) -> Dict[str, Any]:
config = super().get_config()
config.update(
{
"fc_units": self.fc_units,
"depth": self.depth,
"num_heads": self.num_heads,
"mlp_ratio": self.mlp_ratio,
"attn_drop_prob": self.attn_drop_prob,
"proj_drop_prob": self.proj_drop_prob,
"attn_reduction_ratio": self.attn_reduction_ratio,
"stochastic_depth_rate": self.stochastic_depth_rate,
}
)
return config
def get_mix_vision_transformer(
img_shape: List[int],
patch_size: List[int],
strides: List[int],
emb_dims: List[int],
num_heads: List[int],
mlp_ratios: List[int],
proj_drop_prob: float,
attn_drop_prob: float,
stochastic_depth_rate: float,
attn_reduction_ratios: List[int],
depths: List[int],
) -> tf.keras.Model:
"""Instantiate a MiT model.
Returns:
A `tf.keras` model.
"""
dpr = [
rates for rates in np.linspace(0, stochastic_depth_rate, np.sum(depths))
]
img_input = Input(img_shape)
fmap = OverlapPatchEmbed(
patch_size=patch_size[0], strides=strides[0], emb_dim=emb_dims[0]
)(img_input)
fmap_out = StageBlock(
fc_units=emb_dims[0],
depth=depths[0],
num_heads=num_heads[0],
mlp_ratio=mlp_ratios[0],
attn_drop_prob=attn_drop_prob,
proj_drop_prob=proj_drop_prob,
attn_reduction_ratio=attn_reduction_ratios[0],
stochastic_depth_rate=dpr[0],
name="stage_1",
)(fmap)
return Model(img_input, fmap_out)
if __name__ == "__main__":
fmap = np.random.rand(1, 224, 224, 3)
patch_size = [7, 3, 3, 3]
strides = [4, 2, 2, 2]
emb_dims = [64, 128, 256, 512]
num_heads = [1, 2, 4, 8]
mlp_ratios = [4, 4, 4, 4]
proj_drop_prob = 0
attn_drop_prob = 0
stochastic_depth_rate = 0
attn_reduction_ratios = [8, 4, 2, 1]
depths = [3, 4, 6, 3]
# out = StageBlock(fc_units=16, depth=4)(fmap)
model = get_mix_vision_transformer(
img_shape=[224, 224, 3],
patch_size=patch_size,
strides=strides,
emb_dims=emb_dims,
num_heads=num_heads,
mlp_ratios=mlp_ratios,
proj_drop_prob=proj_drop_prob,
attn_drop_prob=attn_drop_prob,
stochastic_depth_rate=stochastic_depth_rate,
attn_reduction_ratios=attn_reduction_ratios,
depths=depths,
)
out = model(fmap)
print(f"{out.shape.as_list()}")
model.summary()
1条答案
按热度按时间8fsztsew1#
这个答案是以edit的形式发布的,它是对'tf.Tensor'进行迭代的问题。在Graph执行中不允许使用'tf.Tensor'。请使用Eager执行,或通过CC BY-SA 4.0下的OP MathieuK用@tf.function修饰此函数。
分别测试每个模块后,错误来自以下行:
它在渴望模式下工作得很好,但是当你编译你的模型时,它就不工作了。我不得不这样修改它。