The core of my problem is the fact that my features come from NumPy files (.npy).
Therefore I need the following class in my code
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet50
import time
import copy
class MyDataSet(torch.utils.data.Dataset):
def __init__(self, x, y, transform=None):
super(MyDataSet, self).__init__()
# store the raw tensors
self._x = np.load(x)
self._y = np.load(y)
self.transform = transform
def __len__(self):
# a DataSet must know it size
return self._x.shape[0]
def __getitem__(self, index):
x = self._x[index, :]
y = self._y[index, :]
return x, y
To convert my NumPy files to DataLoaders I do the following. The code below seems to work (at least, no errors are returned)
#Transform dataset
transform = transforms.Compose([transforms.ToTensor()])
dataset = MyDataSet("train1-features.npy","train1-classes.npy",transform=transform)
dataloader = DataLoader(dataset, batch_size=32)
I am trying to fine-tune a RESNET-50 network in these data with 12 classes. Here is what I do
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
feature_extract = True
batch_size = 8
num_epochs = 15
num_classes=12
model_ft = resnet50(pretrained=True)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
input_size = 224
if torch.cuda.is_available():
model_ft.cuda()
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
params_to_update = []
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
params_to_update.append(param)
print("\t",name)
else:
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
print("\t",name)
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
# Setup the loss fxn
criterion = nn.CrossEntropyLoss()
Finally, here is the problematic training function
for epoch in range(num_epochs): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(dataloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
#transfer labels and inputs to cuda()
inputs,labels=inputs.cuda(), labels.cuda()
# zero the parameter gradients
optimizer.zero_grad()
# forward backward optimize
outputs = model_ft(inputs)
loss = loss_func(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss = loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch 1}, {i 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
This returns me the following error once I execute the code:
Traceback (most recent call last):
File "train_my_data_example.py", line 89, in <module>
for i, data in enumerate(dataloader, 0):
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 517, in __next__
data = self._next_data()
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/dataloader.py", line 557, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "train_my_data_example.py", line 29, in __getitem__
y = self._y[index, :]
IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed
The error is clearly the dataloader variable, so is this creation ok? I mean, I am loading NumPy data and transforming it to a data loader as below:
transform = transforms.Compose([transforms.ToTensor()])
dataset = MyDataSet("train1-features.npy","train1-classes.npy",transform=transform)
dataloader = DataLoader(dataset, batch_size=32)
Is there any error in my data loader or is the problem the training loop of Pytorch?
P.s: you can reproduce my code by downloading the classes and features here
CodePudding user response:
You are trying to index the second axis of an array which only has a single dimension. Simply replace y = self._y[index, :] with y = self._y[index].
Actually when positioned last, : is not required as all dimensions are selected by default.
