I was reading this article that explains unified memory and also went through the code provided but the only thing that I cannot find is if in order to be able to use unified memory I should always allocate objects on the heap through new otherwise I'm getting runtime errors.
Am I missing some configuration for CUDA? Im working on a GTX 760 with CUDA 8.0 installed.
class Object : Managed
{
  Object(){//do something}
  void foo() {//do something else}
};
__global__ void aKernel(Object& obj)
{
  //do something in parallel with the object
}
int main()
{
  Object o;
  aKernel<<<b,t>>>(o);
  cudaDeviceSynchronize(); 
  o.foo();                   // ERROR
  Object* p = new Object;
  aKernel<<<b,t>>>(*p);
  cudaDeviceSynchronize();
  p.foo();                   // GOOD
}
 
     
    