Tensorflow "Required Broadcastable shapes" when fitting my model-CodePudding

I'm very new to this, and I'm not understanding/seeing any resolution to this issue. The error specifically says:

InvalidArgumentError:  required broadcastable shapes
     [[node Equal
 (defined at c:\Users\Connor\Documents\StockIQ\venv\lib\site-packages\keras\metrics.py:3609)
]] [Op:__inference_train_function_141873]

It seems like it might have something to do with the shape of my inputs:

print(train_x.shape)
print(train_y.shape)
print(validation_x.shape)
print(validation_y.shape)
----
(77922, 60, 8)
(77922,)
(3860, 60, 8)
(3860,)

Here's my training statements:

#save model
tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),

)

Any help with this would be amazing - I'm sure I'm missing something simple. I've attached the full file below if that's helpful:

import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint
from functools import reduce
from sklearn import preprocessing
from collections import deque
import random
import time

# Prediction Variables


SEQ_LEN = 60
FUTURE_PERIOUD_PREDICT = 3
RATIO_TO_PREDICT = 'LTC-USD'
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOUD_PREDICT}-PRED-{int(time.time())}"


# joining 4 tables to show user potential relatioships b/w coins


main_df = pd.DataFrame() # begin empty

ratios = ["BTC-USD", "LTC-USD", "BCH-USD", "ETH-USD"]  # the 4 ratios we want to consider
for ratio in ratios:  # begin iteration
    dataset = f'crypto_data/{ratio}.csv'  # get the full path to the file.
    df = pd.read_csv(dataset, names=['time', 'low', 'high', 'open', 'close', 'volume'])  # read in specific file

    # rename volume and close to include the ticker so we can still which close/volume is which:
    df.rename(columns={"close": f"{ratio}_close", "volume": f"{ratio}_volume"}, inplace=True)

    df.set_index("time", inplace=True)  # set time as index so we can join them on this shared time
    df = df[[f"{ratio}_close", f"{ratio}_volume"]]  # ignore the other columns besides price and volume

    if len(main_df)==0:  # if the dataframe is empty
        main_df = df  # then it's just the current df
    else:  # otherwise, join this data to the main one
        main_df = main_df.join(df)

main_df.fillna(method="ffill", inplace=True)  # if there are gaps in data, use previously known values
main_df.dropna(inplace=True)
main_df.head()






# Add result and target columns

# %%
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0



main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOUD_PREDICT) #adding result column

main_df['target'] = list(map(classify, main_df[f'{RATIO_TO_PREDICT}_close'], main_df['future'])) #adding target column
main_df.head()


# seperating training and testing data
# *note the eval data is in the FUTURE of training since it's the same dataset


times = sorted(main_df.index.values)
last_5pct = times[-int(0.05*len(times))]

validation_main_df = main_df[(main_df.index >= last_5pct)]
train_main_df = main_df[(main_df.index < last_5pct)]


# normalize, sequence, and balance data


def preprocess_df(df):
    df = df.drop('future', axis=1)
# normalize data
    for col in df.columns: # normalizing all columns (except target since that's already binary)
        if col != 'target':
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)

    df.dropna(inplace=True)

# sequencing (scaling) data
    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN) # this builds a list and then pops new records in and out to keep the same list length
    
    for i in df.values:
        prev_days.append([n for n in i[:-1]]) # take all columns excpet target
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])
    
    random.shuffle(sequential_data)

# balance data
    buys = []
    sells = []

    for seq, target in sequential_data: # generate two lists to compare buys vs sells
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])

    random.shuffle(buys)
    random.shuffle(sells)

    lower = min(len(buys), len(sells))

    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys sells
    random.shuffle(sequential_data)

    X = []
    Y = []

    for seq, target in sequential_data: #seperate features and labels into X & Y lists
        X.append(seq)
        Y.append(target)

    return np.array(X), np.array(Y)


train_x, train_y = preprocess_df(train_main_df)
validation_x, validation_y = preprocess_df(validation_main_df)


# Build model

#adding layers
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(Dense(32, activation="tanh"))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

# set optimizers
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

# compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)


# Train Model


#save model
tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),

)

CodePudding user response：

Since your label is a scalar ,Change the last layer to output a scaler :

model.add(Dense(1, activation='sigmoid'))

and change the loss function :

loss='binary_crossentropy'

your model will be :

#adding layers
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(Dense(32, activation="tanh"))
model.add(Dropout(0.2))

model.add(Dense(1, activation='sigmoid'))

CodePudding user response：

I found this that resolved the issue. My labels were not formatted correctly:

"If you're doing binary cross-entropy, then your dataset probably has 2 classes and the error is coming because your label vectors (both in testing and training) have the form [0,1,0,1,1,1,0,0,1,...]. To one-hot encode binary labels, the following function can be used: Labels = tf.one_hot(Labels, depth=2)"

I used this on both my train and validation labels and it's at least running. Not sure if the model is performing well or not haha!

Thanks for the input from everyone on this :)