Got ResourceExhaustedError while training deep learning algorithm on NVIDIA GeForce RTX 3050 Ti Laptop GPU using tensorflow with memory_limit: 1721342363
I am training 2870 images and its working well using CPU but on GPU it seems to be getting restricted due to memory limit. Have I turned on a limit of memory on my GPU or do I have no option but to use my CPU? It took me 70 mins on my CPU and that is why I chose to run on my GPU. But while training
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 
history = model.fit(training_set, validation_data=test_set, epochs=20, batch_size=32)
Got this error:
Epoch 1/20
 1/45 [..............................] - ETA: 9:46 - loss: 1.8638 - accuracy: 0.1667
---------------------------------------------------------------------------
ResourceExhaustedError                    Traceback (most recent call last)
Cell In [4], line 3
      1 #Compile the model 
      2 model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 
----> 3 history = model.fit(training_set, validation_data=test_set, epochs=20, batch_size=32)
File ~\anaconda3\envs\tf-gpu1\lib\site-packages\keras\engine\training.py:1184, in Model.fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1177 with tf.profiler.experimental.Trace(
   1178     'train',
   1179     epoch_num=epoch,
   1180     step_num=step,
   1181     batch_size=batch_size,
   1182     _r=1):
   1183   callbacks.on_train_batch_begin(step)
-> 1184   tmp_logs = self.train_function(iterator)
   1185   if data_handler.should_sync:
   1186     context.async_wait()
File ~\anaconda3\envs\tf-gpu1\lib\site-packages\tensorflow\python\eager\def_function.py:885, in Function.__call__(self, *args, **kwds)
    882 compiler = "xla" if self._jit_compile else "nonXla"
    884 with OptionalXlaContext(self._jit_compile):
--> 885   result = self._call(*args, **kwds)
    887 new_tracing_count = self.experimental_get_tracing_count()
    888 without_tracing = (tracing_count == new_tracing_count)
File ~\anaconda3\envs\tf-gpu1\lib\site-packages\tensorflow\python\eager\def_function.py:917, in Function._call(self, *args, **kwds)
    914   self._lock.release()
    915   # In this case we have created variables on the first call, so we run the
    916   # defunned version which is guaranteed to never create variables.
--> 917   return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
    918 elif self._stateful_fn is not None:
    919   # Release the lock early so that multiple threads can perform the call
    920   # in parallel.
    921   self._lock.release()
File ~\anaconda3\envs\tf-gpu1\lib\site-packages\tensorflow\python\eager\function.py:3039, in Function.__call__(self, *args, **kwargs)
   3036 with self._lock:
   3037   (graph_function,
   3038    filtered_flat_args) = self._maybe_define_function(args, kwargs)
-> 3039 return graph_function._call_flat(
   3040     filtered_flat_args, captured_inputs=graph_function.captured_inputs)
File ~\anaconda3\envs\tf-gpu1\lib\site-packages\tensorflow\python\eager\function.py:1963, in ConcreteFunction._call_flat(self, args, captured_inputs, cancellation_manager)
   1959 possible_gradient_type = gradients_util.PossibleTapeGradientTypes(args)
   1960 if (possible_gradient_type == gradients_util.POSSIBLE_GRADIENT_TYPES_NONE
   1961     and executing_eagerly):
   1962   # No tape is watching; skip to running the function.
-> 1963   return self._build_call_outputs(self._inference_function.call(
   1964       ctx, args, cancellation_manager=cancellation_manager))
   1965 forward_backward = self._select_forward_and_backward_functions(
   1966     args,
   1967     possible_gradient_type,
   1968     executing_eagerly)
   1969 forward_function, args_with_tangents = forward_backward.forward()
File ~\anaconda3\envs\tf-gpu1\lib\site-packages\tensorflow\python\eager\function.py:591, in _EagerDefinedFunction.call(self, ctx, args, cancellation_manager)
    589 with _InterpolateFunctionError(self):
    590   if cancellation_manager is None:
--> 591     outputs = execute.execute(
    592         str(self.signature.name),
    593         num_outputs=self._num_outputs,
    594         inputs=args,
    595         attrs=attrs,
    596         ctx=ctx)
    597   else:
    598     outputs = execute.execute_with_cancellation(
    599         str(self.signature.name),
    600         num_outputs=self._num_outputs,
   (...)
    603         ctx=ctx,
    604         cancellation_manager=cancellation_manager)
File ~\anaconda3\envs\tf-gpu1\lib\site-packages\tensorflow\python\eager\execute.py:59, in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     57 try:
     58   ctx.ensure_initialized()
---> 59   tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
     60                                       inputs, attrs, num_outputs)
     61 except core._NotOkStatusException as e:
     62   if name is not None:
ResourceExhaustedError:  OOM when allocating tensor with shape[64,64,224,224] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
     [[node model/block1_conv2/Relu (defined at \AppData\Local\Temp\ipykernel_11956\3538519329.py:3) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_1205]
Function call stack:
train_function