我希望有人能帮我解决这个由tensorflow提出的错误。
我的版本是:
Python 3.6.9
tensorflow 2.3.1
pandas 1.1.4
numpy 1.18.5
Keras 2.4.3
我正在ubuntu服务器上运行代码。
引发的错误是:
tensorflow.python.framework.errors_impl.InternalError: Unsupported object type float
完整的回溯是这样的:
Traceback (most recent call last):
File "usc_coordinator.py", line 61, in <module>
run_usc_coordinator(fIn, fOut, mode)
File "usc_coordinator.py", line 31, in run_usc_coordinator
run_word_classifier(fast_mode, file_out)
File "/home/ubuntu/PA/PA_AI4US/PythonVersion/src/word_classifier.py", line 180, in run_word_classifier
loss, accuracy, f1_score, precision, recall = model.evaluate(y_val, y_pred, verbose=VERBOSE_VALUE)
File "/home/ubuntu/PA/spyder-env/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_v1.py", line 915, in evaluate
use_multiprocessing=use_multiprocessing)
File "/home/ubuntu/PA/spyder-env/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_arrays.py", line 695, in evaluate
callbacks=callbacks)
File "/home/ubuntu/PA/spyder-env/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_arrays.py", line 386, in model_iteration
batch_outs = f(ins_batch)
File "/home/ubuntu/PA/spyder-env/lib/python3.6/site-packages/tensorflow/python/keras/backend.py", line 3825, in __call__
run_metadata=self.run_metadata)
File "/home/ubuntu/PA/spyder-env/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1472, in __call__
run_metadata_ptr)
tensorflow.python.framework.errors_impl.InternalError: Unsupported object type float
我使用的代码如下所示:
import pandas as pd
import numpy as np
import os
import sys
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.python.keras import backend as K
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model, Input
from keras.layers.merge import add
from tensorflow.keras.metrics import Recall
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional, Lambda
from seqeval.metrics import precision_score, recall_score, f1_score, classification_report
from util.functions_file_to import *
from util.config.word_classifier_config import *
from util.config import *
def run_word_classifier(fastMode, file_in):
""" Runs and trains ELMO model as well as evaluates file given
Args:
fastMode : Boolean
if should be run in fast mode
file_in : filepath
file to be turned in User Story
"""
line_count = count_lines_in_file(file_in, 1)
with open(file_in, 'a+') as f:
while not line_count % BATCH_SIZE == 0:
f.write(PAD_WORD + '\n')
line_count += 1
my_list, my_nums = file_to_formatted_array(CLASSIFIER_SENTENCE_FILE)
my_labels = write_label_from_file_to_array(CLASSIFIER_LABEL_FILE)
print(len(my_list))
print(len(my_labels))
print(len(my_nums))
col1 = "Sentence #"
df = pd.DataFrame(data={col1: my_nums, "Word": my_list, "Tag": my_labels})
df.to_csv(SENTENCE_CSV_FILE, sep=',', index=INDEX_CSV)
checking_data, sentence_structure = file_to_formatted_array(file_in)
col1 = "Sentence #"
df = pd.DataFrame(data={col1: sentence_structure, "Word": checking_data, "Tag": 1})
df.to_csv(SENTENCE_CSV_TEST_FILE, sep=',', index=INDEX_CSV)
data = pd.read_csv(SENTENCE_CSV_FILE, encoding=ENCODING)
data = data.fillna(method=FILL_METHOD)
data_test = pd.read_csv(SENTENCE_CSV_TEST_FILE, encoding=ENCODING)
data_test = data_test.fillna(method=FILL_METHOD)
words_test = set(list(data_test['Word'].values))
getter_test = SentenceGetter(data_test)
sent_test = getter_test.get_next()
sentences_test = getter_test.sentences
x_test = [[w[0] for w in s] for s in sentences_test]
new_x_test = []
for seq in x_test:
new_seq = []
for i in range(MAX_LEN):
try:
new_seq.append(seq[i])
except:
new_seq.append(PAD_WORD)
new_x_test.append(new_seq)
words = set(list(data['Word'].values))
words.add(PAD_WORD)
n_words = len(words)
tags = list(set(data["Tag"].values))
n_tags = len(tags)
getter = SentenceGetter(data)
sent = getter.get_next()
sentences = getter.sentences
largest_sen = max(len(sen) for sen in sentences)
big_x = [[w[0] for w in s] for s in sentences]
new_big_x = []
for seq in big_x:
new_seq = []
for i in range(MAX_LEN):
try:
new_seq.append(seq[i])
except:
new_seq.append(PAD_WORD)
new_big_x.append(new_seq)
tags2index = {t: i for i, t in enumerate(tags)}
y = [[tags2index[w[1]] for w in s] for s in sentences]
y = pad_sequences(maxlen=MAX_LEN, sequences=y, padding="post", value=tags2index["O"])
tf.compat.v1.disable_eager_execution()
x_tr, x_te, y_tr, y_te = train_test_split(new_big_x, y, test_size=TEST_SIZE, random_state=2018)
sess = tf.compat.v1.Session()
K.set_session(sess)
elmo_model = hub.Module("https://tfhub.dev/google/elmo/3", trainable=True)
init = tf.compat.v1.global_variables_initializer()
sess.run(init)
input_text = Input(shape=(MAX_LEN,), dtype=tf.string)
def elmo_embedding(inData):
return \
elmo_model(inputs={"tokens": tf.squeeze(tf.cast(inData, tf.string)),
"sequence_len": tf.constant(BATCH_SIZE * [MAX_LEN])},
signature="tokens", as_dict=True)["elmo"]
embedding = Lambda(elmo_embedding, output_shape=(MAX_LEN, 1024))(input_text)
x = Bidirectional(LSTM(units=LSTM_UNITS, return_sequences=LSTM_RETURN_SEQ,
recurrent_dropout=LSTM_RO_DROPOUT, dropout=LSTM_DROPOUT))(embedding)
x_rnn = Bidirectional(LSTM(units=LSTM_UNITS, return_sequences=LSTM_RETURN_SEQ,
recurrent_dropout=LSTM_RO_DROPOUT, dropout=LSTM_DROPOUT))(x)
x = add([x, x_rnn]) # residual connection to the first biLSTM
out = TimeDistributed(Dense(n_tags, activation="softmax"))(x)
model = Model(input_text, out)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
if fastMode:
line_count_training_data = count_lines_in_file(CLASSIFIER_SENTENCE_FILE, 10)
size_train, size_test = get_count_for_batch_train_test_data(line_count_training_data)
print(size_train)
mode_dict = {
"train": size_train,
"test": size_test,
}
else:
line_count_training_data = count_lines_in_file(CLASSIFIER_SENTENCE_FILE, 1)
size_train, size_test = get_count_for_batch_train_test_data(line_count_training_data)
print(size_train)
mode_dict = {
"train": size_train,
"test": size_test,
}
x_tr, x_val = x_tr[:mode_dict["train"] * BATCH_SIZE], x_tr[-mode_dict["test"] * BATCH_SIZE:]
y_tr, y_val = y_tr[:mode_dict["train"] * BATCH_SIZE], y_tr[-mode_dict["test"] * BATCH_SIZE:]
y_tr = y_tr.reshape(y_tr.shape[0], y_tr.shape[1], 1)
y_val = y_val.reshape(y_val.shape[0], y_val.shape[1], 1)
history = model.fit(np.array(x_tr), y_tr, validation_data=(np.array(x_val), y_val), batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSE_VALUE) #NUM_EPOCHS
x_val_arr = np.asarray(x_val)
y_pred1 = model.predict(x_val_arr)
y_pred = np.argmax(y_pred1, axis=1)
y_pred = y_pred.astype(np.float32)
y_val_reshaped = y_val.transpose(2,0,1).reshape(192,-1)
y_val = np.asarray(y_val_reshaped).astype(np.float32)
loss, accuracy, f1_score, precision, recall = model.evaluate(y_val, y_pred, verbose=VERBOSE_VALUE)
print('loss:', loss)
print('accuracy:', accuracy)
print('f1_score:', f1_score)
print('precision:', precision)
print('recall:', recall)
idx2tag = {i: w for w, i in tags2index.items()}
test_pred = model.predict(np.array(new_x_test), verbose=VERBOSE_VALUE)
pred_labels = pred_to_label(test_pred, idx2tag)
write_to_file("", OUTPUT_LABEL_FILE)
with open(OUTPUT_LABEL_FILE, 'a+') as f:
for i in pred_labels:
f.write(" ".join(i) + "\n")
tags, sentence = file_to_formatted_array(OUTPUT_LABEL_FILE)
lines = new_x_test
num_sen = 0
for _ in lines:
num_sen += 1
user_stories = []
relevant_tag = []
tag_sen = 0
i = 0
last_user = ""
last_tag = 0
tag_eval = ""
while i < num_sen - 1:
for j in lines[i]:
if not j == PAD_WORD:
tag_eval = tag_eval+" "+tags[tag_sen]+": "+j+"\n"
if not tags[tag_sen] == 'O':
relevant_tag.append(tags[tag_sen])
if tags[tag_sen] == '1':
if last_tag == '1':
last_user = last_user + " " + j
else:
last_user = j
if tags[tag_sen] == '4':
j = last_user
if tags[tag_sen] == '9':
j = "\n"
if "." in j:
j = j + "\n"
user_stories.append(j)
last_tag = tags[tag_sen]
tag_sen += 1
i += 1
write_to_file(" ".join(user_stories), OUTPUT_FILE)
embellish(OUTPUT_FILE)
print("FINISHED: Result in ", OUTPUT_FILE)
def pred_to_label(pred, tag):
""" Forms prediction to a label
Args:
pred : float
prediction value for label
tag : string
Returns:
out : string
prediction as label
"""
out = []
for pred_i in pred:
out_i = []
for p in pred_i:
p_i = np.argmax(p)
out_i.append(tag[p_i].replace(PAD_WORD, "O"))
out.append(out_i)
return out
def count_lines_in_file(file_to_count, partial):
""" Counts lines in files and returns 1/partial number of lines
Args:
file_to_count : string
filepath of file
partial : int
1/partial to be returnde
returns:
number of lines : int
"""
count = 0
with open(file_to_count, 'r') as f:
for line in f:
if line != "\n":
count += 1
count = (count - (count % partial))/partial
return int(count)
def get_count_for_batch_train_test_data(count):
""" calculates number of lines used for training and number of lines used for evaluation based on batch size
Args:
count : int
number of lines in training data
Returns:
number of lines for training : int
number of lines for evaluation : int
"""
count_testing = count * TEST_SIZE
count_training = count * (1 - TEST_SIZE)
diff_test = count_testing - (count_testing % BATCH_SIZE)
diff_train = count_training - (count_training % BATCH_SIZE)
return int(diff_train / BATCH_SIZE), int(diff_test / BATCH_SIZE)
def embellished_list_to_string(l):
""" Turns a list into a string
Note:
The function is programmed to be used in the embellish function
Args:
l : list
list to be turned into a string
Returns:
String based on list : string
"""
str1 = ""
for ele in l:
str1 += ele + " "
return str1
def embellish(file_out):
""" Embellishes a file, removing Padwords and puts each sentence on new line as well as removing empty lines
Args:
file_out : filepath
File to be embelished
"""
vowels = ('a','e','i','o','A','E','I','O')
lines = file_to_type_array(str, file_out)
write_to_file("", file_out)
for line in lines:
new_line = []
words = line.split()
line_count = len(words)
for i in range(len(words)):
if i + 1 < line_count:
if not words[i] == words[i + 1]:
new_line.append(words[i])
else:
new_line.append(words[i])
if len(words) > 1:
embellished_sentence = embellished_list_to_string(new_line)+"\n"
if embellished_sentence.startswith(vowels):
start = "As an "
else:
start = "As a "
embellished_sentence = start + embellished_sentence.replace(" " + PAD_WORD, "")\
.replace(" is able to", ", I am able to")\
.replace(" must be able to", ", I am able to")\
.replace(" should be able to", ", I am able to")\
.replace(" wants", ", I want")\
.replace(" can", ", I can")\
.replace(" her ", " my ")\
.replace(" his ", " my ")
with open(file_out, 'a+') as f:
f.write(embellished_sentence)
我主要关注的是使用model.evaluate()获取预测的召回值。我已经检查了y_val和y_pred的唯一值和任何nan,它们都没有返回任何奇怪的结果。
y_pred的形状为(192,6),包含以下内容:
[[12. 29. 0. 25. 16. 41.]
[ 9. 29. 0. 7. 14. 41.]
[ 2. 31. 5. 17. 15. 42.]
...
[11. 34. 0. 5. 16. 43.]
[ 3. 35. 0. 29. 16. 44.]
[11. 34. 21. 28. 6. 41.]]
y_val的形状为(192,50),包含以下内容:
[[3. 3. 3. ... 0. 0. 0.]
[3. 1. 1. ... 0. 0. 0.]
[3. 1. 1. ... 0. 0. 0.]
...
[3. 1. 1. ... 0. 0. 0.]
[3. 1. 1. ... 0. 0. 0.]
[3. 3. 3. ... 0. 0. 0.]]
我希望有人能帮忙!
编辑:
我尝试了@vimboi145的建议,得到了以下结果:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Can not squeeze dim[1], expected a dimension of 1, got 50
[[{{node metrics/accuracy/Squeeze}}]]
所以我补充说
x_val_arr = np.argmax(x_val_arr, axis=1)
但是,这现在给了我以下错误:
ValueError: Error when checking input: expected input_1 to have shape (50,) but got array with shape (1,)
现在我真的不知道该怎么办了
1条答案
按热度按时间bakd9h0s1#
keras的model.evaluate接受x和y参数,而不是y_true和y_pred,所以我会改变
到
看看它是怎么运行的。