[cuda] What is the canonical way to check for errors using the CUDA runtime API?

talonmies' answer above is a fine way to abort an application in an assert-style manner.

Occasionally we may wish to report and recover from an error condition in a C++ context as part of a larger application.

Here's a reasonably terse way to do that by throwing a C++ exception derived from std::runtime_error using thrust::system_error:

#include <thrust/system_error.h>
#include <thrust/system/cuda/error.h>
#include <sstream>

void throw_on_cuda_error(cudaError_t code, const char *file, int line)
{
  if(code != cudaSuccess)
  {
    std::stringstream ss;
    ss << file << "(" << line << ")";
    std::string file_and_line;
    ss >> file_and_line;
    throw thrust::system_error(code, thrust::cuda_category(), file_and_line);
  }
}

This will incorporate the filename, line number, and an English language description of the cudaError_t into the thrown exception's .what() member:

#include <iostream>

int main()
{
  try
  {
    // do something crazy
    throw_on_cuda_error(cudaSetDevice(-1), __FILE__, __LINE__);
  }
  catch(thrust::system_error &e)
  {
    std::cerr << "CUDA error after cudaSetDevice: " << e.what() << std::endl;

    // oops, recover
    cudaSetDevice(0);
  }

  return 0;
}

The output:

$ nvcc exception.cu -run
CUDA error after cudaSetDevice: exception.cu(23): invalid device ordinal

A client of some_function can distinguish CUDA errors from other kinds of errors if desired:

try
{
  // call some_function which may throw something
  some_function();
}
catch(thrust::system_error &e)
{
  std::cerr << "CUDA error during some_function: " << e.what() << std::endl;
}
catch(std::bad_alloc &e)
{
  std::cerr << "Bad memory allocation during some_function: " << e.what() << std::endl;
}
catch(std::runtime_error &e)
{
  std::cerr << "Runtime error during some_function: " << e.what() << std::endl;
}
catch(...)
{
  std::cerr << "Some other kind of error during some_function" << std::endl;

  // no idea what to do, so just rethrow the exception
  throw;
}

Because thrust::system_error is a std::runtime_error, we can alternatively handle it in the same manner of a broad class of errors if we don't require the precision of the previous example:

try
{
  // call some_function which may throw something
  some_function();
}
catch(std::runtime_error &e)
{
  std::cerr << "Runtime error during some_function: " << e.what() << std::endl;
}