我在训练tfhub预训练的嵌入keraslayer时得到了这个错误
Epoch 1/10
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
<ipython-input-7-e9085b1a50d7> in <cell line: 1>()
----> 1 history = model.fit(train_ds,batch_size=BATCH_SIZE,steps_per_epoch=train_steps,epochs=10,validation_data=valid_ds,validation_steps=valid_steps)
1 frames
/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py in error_handler(*args, **kwargs)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
/usr/local/lib/python3.10/dist-packages/tensorflow/python/framework/ops.py in _numpy(self)
1107 return self._numpy_internal()
1108 except core._NotOkStatusException as e: # pylint: disable=protected-access
-> 1109 raise core._status_to_exception(e) from None # pylint: disable=protected-access
1110
1111 @property
InternalError: RET_CHECK failure (third_party/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc:2008) arg_shape.handle_type != DT_INVALID input edge: [id=6646 Func/while/body/_1/input/_1330:0 -> while/cluster_while_body_146058:634]
完整代码:
!pip3 install -q -U tensorflow-text
from IPython.display import clear_output
import tensorflow as tf
import numpy as np
from google.colab import auth
auth.authenticate_user()
import os
import tensorflow_datasets as tfds
import tensorflow_hub as hub
from tensorflow import keras
import tensorflow_text as text
import os
tpu_resolver = tf.distribute.cluster_resolver.TPUClusterResolver("grpc://"+os.environ["COLAB_TPU_ADDR"])
tf.config.experimental_connect_to_cluster(tpu_resolver)
tf.tpu.experimental.initialize_tpu_system(tpu_resolver)
strategy = tf.distribute.TPUStrategy(tpu_resolver)
(train_raw, valid_raw),ds_info = tfds.load(
name="imdb_reviews",
split=["train", "test"],
as_supervised=True,
try_gcs=True,
with_info=True
)
BATCH_SIZE = 16 * 8
train_size = ds_info.splits['train'].num_examples # 25000
valid_size = ds_info.splits['test'].num_examples # 25000
train_steps = train_size // BATCH_SIZE
valid_steps = valid_size // BATCH_SIZE
train_ds = train_raw.shuffle(8000)
train_ds = train_ds.repeat()
train_ds = train_ds.batch(BATCH_SIZE,drop_remainder=True)
train_ds = train_ds.prefetch(-1)
valid_ds = valid_raw.batch(BATCH_SIZE,drop_remainder=True)
valid_ds = valid_ds.prefetch(-1)
with strategy.scope():
load_locally = tf.saved_model.LoadOptions(experimental_io_device="/job:localhost")
inp_ = keras.layers.Input(shape=[],dtype=tf.string)
z = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder-cmlm/multilingual-preprocess/2",load_options=load_locally)(inp_)
z = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder-cmlm/multilingual-base-br/1",trainable=True,load_options=load_locally)(z)
z = keras.layers.Lambda(lambda z: z['default'])(z)
z = keras.layers.Flatten()(z)
z = keras.layers.Dense(64,"relu")(z)
out_ = keras.layers.Dense(1,"sigmoid")(z)
model = keras.models.Model(inputs=[inp_],outputs=[out_])
model.compile(loss="binary_crossentropy", optimizer="nadam",metrics=["accuracy"],steps_per_execution=20)
model.fit(train_ds,steps_per_epoch=train_steps,epochs=10,validation_data=valid_ds,validation_steps=valid_steps)
我已经为同样的问题检查了stackoverflow,我看到一篇文章说他们的问题通过改变steps_per_epoch解决了。我改变了步骤,减少和增加,但我得到了同样的事情一次又一次。
1条答案
按热度按时间cbwuti441#
找到了答案TPU无法处理tf. string,所以你需要在TPU的CPU中添加输入的预处理,这是在模型之外和策略范围之内的。