tensorflow不支持对象类型float

t3irkdon  于 2021-09-29  发布在  Java
关注(0)|答案(1)|浏览(510)

我希望有人能帮我解决这个由tensorflow提出的错误。
我的版本是:

Python 3.6.9
tensorflow 2.3.1
pandas 1.1.4
numpy 1.18.5
Keras 2.4.3

我正在ubuntu服务器上运行代码。
引发的错误是:

tensorflow.python.framework.errors_impl.InternalError: Unsupported object type float

完整的回溯是这样的:

Traceback (most recent call last):
  File "usc_coordinator.py", line 61, in <module>
    run_usc_coordinator(fIn, fOut, mode)
  File "usc_coordinator.py", line 31, in run_usc_coordinator
    run_word_classifier(fast_mode, file_out)
  File "/home/ubuntu/PA/PA_AI4US/PythonVersion/src/word_classifier.py", line 180, in run_word_classifier
    loss, accuracy, f1_score, precision, recall = model.evaluate(y_val, y_pred, verbose=VERBOSE_VALUE)
  File "/home/ubuntu/PA/spyder-env/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_v1.py", line 915, in evaluate
    use_multiprocessing=use_multiprocessing)
  File "/home/ubuntu/PA/spyder-env/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_arrays.py", line 695, in evaluate
    callbacks=callbacks)
  File "/home/ubuntu/PA/spyder-env/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_arrays.py", line 386, in model_iteration
    batch_outs = f(ins_batch)
  File "/home/ubuntu/PA/spyder-env/lib/python3.6/site-packages/tensorflow/python/keras/backend.py", line 3825, in __call__
    run_metadata=self.run_metadata)
  File "/home/ubuntu/PA/spyder-env/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1472, in __call__
    run_metadata_ptr)
tensorflow.python.framework.errors_impl.InternalError: Unsupported object type float

我使用的代码如下所示:

import pandas as pd
import numpy as np
import os
import sys

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.python.keras import backend as K

from sklearn.model_selection import train_test_split

from keras.preprocessing.sequence import pad_sequences
from keras.models import Model, Input
from keras.layers.merge import add
from tensorflow.keras.metrics import Recall
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional, Lambda
from seqeval.metrics import precision_score, recall_score, f1_score, classification_report

from util.functions_file_to import *
from util.config.word_classifier_config import *
from util.config import *

def run_word_classifier(fastMode, file_in):
    """ Runs and trains ELMO model as well as evaluates file given

        Args:
            fastMode : Boolean
                if should be run in fast mode
            file_in : filepath
                file to be turned in User Story
    """

    line_count = count_lines_in_file(file_in, 1)

    with open(file_in, 'a+') as f:
        while not line_count % BATCH_SIZE == 0:
            f.write(PAD_WORD + '\n')
            line_count += 1

    my_list, my_nums = file_to_formatted_array(CLASSIFIER_SENTENCE_FILE)
    my_labels = write_label_from_file_to_array(CLASSIFIER_LABEL_FILE)
    print(len(my_list))
    print(len(my_labels))
    print(len(my_nums))
    col1 = "Sentence #"
    df = pd.DataFrame(data={col1: my_nums, "Word": my_list, "Tag": my_labels})
    df.to_csv(SENTENCE_CSV_FILE, sep=',', index=INDEX_CSV)

    checking_data, sentence_structure = file_to_formatted_array(file_in)

    col1 = "Sentence #"
    df = pd.DataFrame(data={col1: sentence_structure, "Word": checking_data, "Tag": 1})
    df.to_csv(SENTENCE_CSV_TEST_FILE, sep=',', index=INDEX_CSV)

    data = pd.read_csv(SENTENCE_CSV_FILE, encoding=ENCODING)
    data = data.fillna(method=FILL_METHOD)
    data_test = pd.read_csv(SENTENCE_CSV_TEST_FILE, encoding=ENCODING)
    data_test = data_test.fillna(method=FILL_METHOD)
    words_test = set(list(data_test['Word'].values))
    getter_test = SentenceGetter(data_test)
    sent_test = getter_test.get_next()
    sentences_test = getter_test.sentences

    x_test = [[w[0] for w in s] for s in sentences_test]
    new_x_test = []
    for seq in x_test:
        new_seq = []
        for i in range(MAX_LEN):
            try:
                new_seq.append(seq[i])
            except:
                new_seq.append(PAD_WORD)
        new_x_test.append(new_seq)

    words = set(list(data['Word'].values))
    words.add(PAD_WORD)

    n_words = len(words)
    tags = list(set(data["Tag"].values))
    n_tags = len(tags)

    getter = SentenceGetter(data)
    sent = getter.get_next()

    sentences = getter.sentences

    largest_sen = max(len(sen) for sen in sentences)

    big_x = [[w[0] for w in s] for s in sentences]
    new_big_x = []
    for seq in big_x:
        new_seq = []
        for i in range(MAX_LEN):
            try:
                new_seq.append(seq[i])
            except:
                new_seq.append(PAD_WORD)
        new_big_x.append(new_seq)

    tags2index = {t: i for i, t in enumerate(tags)}
    y = [[tags2index[w[1]] for w in s] for s in sentences]
    y = pad_sequences(maxlen=MAX_LEN, sequences=y, padding="post", value=tags2index["O"])

    tf.compat.v1.disable_eager_execution()
    x_tr, x_te, y_tr, y_te = train_test_split(new_big_x, y, test_size=TEST_SIZE, random_state=2018)

    sess = tf.compat.v1.Session()
    K.set_session(sess)

    elmo_model = hub.Module("https://tfhub.dev/google/elmo/3", trainable=True)
    init = tf.compat.v1.global_variables_initializer()

    sess.run(init)

    input_text = Input(shape=(MAX_LEN,), dtype=tf.string)

    def elmo_embedding(inData):
        return \
            elmo_model(inputs={"tokens": tf.squeeze(tf.cast(inData, tf.string)),
                               "sequence_len": tf.constant(BATCH_SIZE * [MAX_LEN])},
                       signature="tokens", as_dict=True)["elmo"]

    embedding = Lambda(elmo_embedding, output_shape=(MAX_LEN, 1024))(input_text)

    x = Bidirectional(LSTM(units=LSTM_UNITS, return_sequences=LSTM_RETURN_SEQ,
                           recurrent_dropout=LSTM_RO_DROPOUT, dropout=LSTM_DROPOUT))(embedding)
    x_rnn = Bidirectional(LSTM(units=LSTM_UNITS, return_sequences=LSTM_RETURN_SEQ,
                               recurrent_dropout=LSTM_RO_DROPOUT, dropout=LSTM_DROPOUT))(x)
    x = add([x, x_rnn])  # residual connection to the first biLSTM
    out = TimeDistributed(Dense(n_tags, activation="softmax"))(x)
    model = Model(input_text, out)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

    if fastMode:
        line_count_training_data = count_lines_in_file(CLASSIFIER_SENTENCE_FILE, 10)
        size_train, size_test = get_count_for_batch_train_test_data(line_count_training_data)
        print(size_train)
        mode_dict = {
            "train": size_train,
            "test": size_test,
        }
    else:
        line_count_training_data = count_lines_in_file(CLASSIFIER_SENTENCE_FILE, 1)
        size_train, size_test = get_count_for_batch_train_test_data(line_count_training_data)
        print(size_train)
        mode_dict = {
            "train": size_train,
            "test": size_test,
        }

    x_tr, x_val = x_tr[:mode_dict["train"] * BATCH_SIZE], x_tr[-mode_dict["test"] * BATCH_SIZE:]
    y_tr, y_val = y_tr[:mode_dict["train"] * BATCH_SIZE], y_tr[-mode_dict["test"] * BATCH_SIZE:]
    y_tr = y_tr.reshape(y_tr.shape[0], y_tr.shape[1], 1)
    y_val = y_val.reshape(y_val.shape[0], y_val.shape[1], 1)
    history = model.fit(np.array(x_tr), y_tr, validation_data=(np.array(x_val), y_val), batch_size=BATCH_SIZE, epochs=1, verbose=VERBOSE_VALUE) #NUM_EPOCHS

    x_val_arr = np.asarray(x_val)
    y_pred1 = model.predict(x_val_arr)
    y_pred = np.argmax(y_pred1, axis=1)
    y_pred = y_pred.astype(np.float32)

    y_val_reshaped = y_val.transpose(2,0,1).reshape(192,-1)
    y_val = np.asarray(y_val_reshaped).astype(np.float32)

    loss, accuracy, f1_score, precision, recall = model.evaluate(y_val, y_pred, verbose=VERBOSE_VALUE)

    print('loss:', loss)
    print('accuracy:', accuracy)
    print('f1_score:', f1_score)
    print('precision:', precision)
    print('recall:', recall)

    idx2tag = {i: w for w, i in tags2index.items()}
    test_pred = model.predict(np.array(new_x_test), verbose=VERBOSE_VALUE)
    pred_labels = pred_to_label(test_pred, idx2tag)

    write_to_file("", OUTPUT_LABEL_FILE)
    with open(OUTPUT_LABEL_FILE, 'a+') as f:
        for i in pred_labels:
            f.write(" ".join(i) + "\n")

    tags, sentence = file_to_formatted_array(OUTPUT_LABEL_FILE)
    lines = new_x_test
    num_sen = 0
    for _ in lines:
        num_sen += 1

    user_stories = []
    relevant_tag = []
    tag_sen = 0
    i = 0
    last_user = ""
    last_tag = 0
    tag_eval = ""
    while i < num_sen - 1:
        for j in lines[i]:
            if not j == PAD_WORD:
                tag_eval = tag_eval+" "+tags[tag_sen]+": "+j+"\n"
            if not tags[tag_sen] == 'O':
                relevant_tag.append(tags[tag_sen])
                if tags[tag_sen] == '1':
                    if last_tag == '1':
                        last_user = last_user + " " + j
                    else:
                        last_user = j
                if tags[tag_sen] == '4':
                    j = last_user
                if tags[tag_sen] == '9':
                    j = "\n"
                if "." in j:
                    j = j + "\n"
                user_stories.append(j)
                last_tag = tags[tag_sen]
            tag_sen += 1
        i += 1

    write_to_file(" ".join(user_stories), OUTPUT_FILE)
    embellish(OUTPUT_FILE)
    print("FINISHED: Result in ", OUTPUT_FILE)

def pred_to_label(pred, tag):
    """ Forms prediction to a label

        Args:
            pred : float
                prediction value for label
            tag : string

        Returns:
            out : string
                prediction as label
    """
    out = []
    for pred_i in pred:
        out_i = []
        for p in pred_i:
            p_i = np.argmax(p)
            out_i.append(tag[p_i].replace(PAD_WORD, "O"))
        out.append(out_i)
    return out

def count_lines_in_file(file_to_count, partial):
    """ Counts lines in files and returns 1/partial number of lines

        Args:
            file_to_count : string
                filepath of file
            partial : int
                1/partial to be returnde

        returns:
            number of lines : int

    """
    count = 0
    with open(file_to_count, 'r') as f:
        for line in f:
            if line != "\n":
                count += 1
    count = (count - (count % partial))/partial
    return int(count)

def get_count_for_batch_train_test_data(count):
    """ calculates number of lines used for training and number of lines used for evaluation based on batch size

        Args:
            count : int
                number of lines in training data

        Returns:
            number of lines for training : int
            number of lines for evaluation : int
    """
    count_testing = count * TEST_SIZE
    count_training = count * (1 - TEST_SIZE)

    diff_test = count_testing - (count_testing % BATCH_SIZE)
    diff_train = count_training - (count_training % BATCH_SIZE)

    return int(diff_train / BATCH_SIZE), int(diff_test / BATCH_SIZE)

def embellished_list_to_string(l):
    """ Turns a list into a string

        Note:
            The function is programmed to be used in the embellish function

        Args:
            l : list 
                list to be turned into a string 

        Returns:
            String based on list : string
    """

    str1 = ""
    for ele in l:
        str1 += ele + " "
    return str1

def embellish(file_out):
    """ Embellishes a file, removing Padwords and puts each sentence on new line as well as removing empty lines

        Args:
            file_out : filepath
                File to be embelished

    """
    vowels = ('a','e','i','o','A','E','I','O')
    lines = file_to_type_array(str, file_out)
    write_to_file("", file_out)

    for line in lines:
        new_line = []
        words = line.split()
        line_count = len(words)
        for i in range(len(words)):
            if i + 1 < line_count:
                if not words[i] == words[i + 1]:
                    new_line.append(words[i])
            else:
                new_line.append(words[i])
        if len(words) > 1:

            embellished_sentence = embellished_list_to_string(new_line)+"\n"

            if embellished_sentence.startswith(vowels):
                start = "As an "
            else:
                start = "As a "
            embellished_sentence = start + embellished_sentence.replace(" " + PAD_WORD, "")\
                .replace(" is able to", ", I am able to")\
                .replace(" must be able to", ", I am able to")\
                .replace(" should be able to", ", I am able to")\
                .replace(" wants", ", I want")\
                .replace(" can", ", I can")\
                .replace(" her ", " my ")\
                .replace(" his ", " my ")
            with open(file_out, 'a+') as f:
                f.write(embellished_sentence)

我主要关注的是使用model.evaluate()获取预测的召回值。我已经检查了y_val和y_pred的唯一值和任何nan,它们都没有返回任何奇怪的结果。
y_pred的形状为(192,6),包含以下内容:

[[12. 29.  0. 25. 16. 41.]
 [ 9. 29.  0.  7. 14. 41.]
 [ 2. 31.  5. 17. 15. 42.]
 ...
 [11. 34.  0.  5. 16. 43.]
 [ 3. 35.  0. 29. 16. 44.]
 [11. 34. 21. 28.  6. 41.]]

y_val的形状为(192,50),包含以下内容:

[[3. 3. 3. ... 0. 0. 0.]
 [3. 1. 1. ... 0. 0. 0.]
 [3. 1. 1. ... 0. 0. 0.]
 ...
 [3. 1. 1. ... 0. 0. 0.]
 [3. 1. 1. ... 0. 0. 0.]
 [3. 3. 3. ... 0. 0. 0.]]

我希望有人能帮忙!
编辑:
我尝试了@vimboi145的建议,得到了以下结果:

tensorflow.python.framework.errors_impl.InvalidArgumentError: Can not squeeze dim[1], expected a dimension of 1, got 50
         [[{{node metrics/accuracy/Squeeze}}]]

所以我补充说

x_val_arr = np.argmax(x_val_arr, axis=1)

但是,这现在给了我以下错误:

ValueError: Error when checking input: expected input_1 to have shape (50,) but got array with shape (1,)

现在我真的不知道该怎么办了

bakd9h0s

bakd9h0s1#

keras的model.evaluate接受x和y参数,而不是y_true和y_pred,所以我会改变

loss, accuracy, f1_score, precision, recall = model.evaluate(y_val, y_pred, verbose=VERBOSE_VALUE)

loss, accuracy, f1_score, precision, recall = model.evaluate(x_val_arr, y_val, verbose=VERBOSE_VALUE)

看看它是怎么运行的。

相关问题