I have a simple model, and I'm trying to run a classification task.
The following is my model
num_epochs = 10
batch_size = 64
learning_rate = 0.001
input_size =  44 
hidden_size = 500 
num_classes = 2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.l1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        return out
the error happens when running till loss.back()
    for epoch in range(num_epochs):
        for i, (features,labels) in enumerate(train_loader):
            features = features.view(-1, 44)
            labels = torch.tensor(labels, dtype=torch.long, device=device)
    
            outputs = model(features)
            print(outputs.shape)
            print(labels.shape)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()  # Error happens
            optimizer.step()
the shape of output is (64,2)
the shape of labels is (64)
The error is like
 File "dl.py", line 218, in cnn_setup
    loss.backward()
  File "F:\Python3.7.7\lib\site-packages\torch\tensor.py", line 185, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "F:\Python3.7.7\lib\site-packages\torch\autograd\__init__.py", line 127, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`
Exception raised from createCublasHandle at ..\aten\src\ATen\cuda\CublasHandlePool.cpp:8 (most recent call first):
00007FF8DB5E75A200007FF8DB5E7540 c10.dll!c10::Error::Error [<unknown file> @ <unknown line number>]
00007FF863A5AEA800007FF863A59E70 torch_cuda.dll!at::cuda::getCurrentCUDASparseHandle [<unknown file> @ <unknown line number>]
00007FF863A5A7D800007FF863A59E70 torch_cuda.dll!at::cuda::getCurrentCUDASparseHandle [<unknown file> @ <unknown line number>]
00007FF863A5B66700007FF863A5B1A0 torch_cuda.dll!at::cuda::getCurrentCUDABlasHandle [<unknown file> @ <unknown line number>]
00007FF863A5B24700007FF863A5B1A0 torch_cuda.dll!at::cuda::getCurrentCUDABlasHandle [<unknown file> @ <unknown line number>]
...
I found a similar question, but it seems doesn't apply to my situation,
what could be possible solutions to this problem?
 
    