Problem with CUDA backend

Dear PyFR Developers,

I am trying to run a simulation with CUDA backend on 12 GPUs. However I get the following error:

Traceback (most recent call last):
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/util.py”, line 33, in call
res = cache[key]
KeyError: (<function CUDAKernelProvider.build_kernel at 0x2b76ca4d9268>, b’\x80\x03X\x08\x00\x00\x00mpicfluxq\x00X3&\x00\x00\n\n// AoSoA macros\n#define SOA_SZ 32\n#define SOA_IX(a, v, nv) ((((a) / SOA_SZ)(nv) + (v))SOA_SZ + (a) % SOA_SZ)\n\n// Typedefs\ntypedef double fpdtype_t;\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n__global_ void mpicflux(int _nx, fpdtype_t restrict gradul_v, const int restrict gradul_vix, const int* restrict gradul_vrstri, const fpdtype_t* restrict magnl_v, const fpdtype_t* restrict nl_v, int ldnl, fpdtype_t* restrict ul_v, const int* restrict ul_vix, fpdtype_t* restrict ur_v)\n {\n int x = blockIdx.x*blockDim.x + threadIdx.x;\n #define X_IDX (x)\n #define X_IDX_AOSOA(v, nv) SOA_IX(X_IDX, v, nv)\n if (x < nx)\n {\n \n // Perform the Riemann solve\n fpdtype_t ficomm[5], fvcomm;\n {\n\n // Compute the left and right fluxes + velocities and pressures\n fpdtype_t fl[3][5], fr[3][5];\n fpdtype_t vl[3], vr[3];\n fpdtype_t pl_, pr_;\n\n {\n\n fpdtype_t invrho__ = 1.0/ul_v[ul_vix[X_IDX] + SOA_SZ*(0)], E__ = ul_v[ul_vix[X_IDX] + SOA_SZ*(4)];\n\n // Compute the velocities\n fpdtype_t rhov__[3];\n rhov__[0] = ul_v[ul_vix[X_IDX] + SOA_SZ*(1)];\n vl_[0] = invrho__rhov__[0];\n rhov__[1] = ul_v[ul_vix[X_IDX] + SOA_SZ(2)];\n vl_[1] = invrho__rhov__[1];\n rhov__[2] = ul_v[ul_vix[X_IDX] + SOA_SZ(3)];\n vl_[2] = invrho__rhov__[2];\n\n // Compute the pressure\n pl_ = 0.3999999999999999(E__ - 0.5invrho__((rhov__[0])(rhov__[0]) + (rhov__[1])(rhov__[1]) + (rhov__[2])(rhov__[2])));\n\n // Density and energy fluxes\n fl_[0][0] = rhov__[0];\n fl_[0][4] = (E__ + pl_)vl_[0];\n fl_[1][0] = rhov__[1];\n fl_[1][4] = (E__ + pl_)vl_[1];\n fl_[2][0] = rhov__[2];\n fl_[2][4] = (E__ + pl_)vl_[2];\n\n // Momentum fluxes\n fl_[0][1] = rhov__[0]vl_[0] + pl_;\n fl_[0][2] = rhov__[0]vl_[1];\n fl_[0][3] = rhov__[0]vl_[2];\n fl_[1][1] = rhov__[1]vl_[0];\n fl_[1][2] = rhov__[1]vl_[1] + pl_;\n fl_[1][3] = rhov__[1]vl_[2];\n fl_[2][1] = rhov__[2]vl_[0];\n fl_[2][2] = rhov__[2]vl_[1];\n fl_[2][3] = rhov__[2]vl_[2] + pl_;\n\n};\n {\n\n fpdtype_t invrho__ = 1.0/ur_v[_nx(0) + X_IDX], E__ = ur_v[_nx(4) + X_IDX];\n\n // Compute the velocities\n fpdtype_t rhov__[3];\n rhov__[0] = ur_v[_nx(1) + X_IDX];\n vr_[0] = invrho__rhov__[0];\n rhov__[1] = ur_v[_nx(2) + X_IDX];\n vr_[1] = invrho__rhov__[1];\n rhov__[2] = ur_v[_nx(3) + X_IDX];\n vr_[2] = invrho__rhov__[2];\n\n // Compute the pressure\n pr_ = 0.3999999999999999(E__ - 0.5invrho__((rhov__[0])(rhov__[0]) + (rhov__[1])(rhov__[1]) + (rhov__[2])(rhov__[2])));\n\n // Density and energy fluxes\n fr_[0][0] = rhov__[0];\n fr_[0][4] = (E__ + pr_)vr_[0];\n fr_[1][0] = rhov__[1];\n fr_[1][4] = (E__ + pr_)vr_[1];\n fr_[2][0] = rhov__[2];\n fr_[2][4] = (E__ + pr_)vr_[2];\n\n // Momentum fluxes\n fr_[0][1] = rhov__[0]vr_[0] + pr_;\n fr_[0][2] = rhov__[0]vr_[1];\n fr_[0][3] = rhov__[0]vr_[2];\n fr_[1][1] = rhov__[1]vr_[0];\n fr_[1][2] = rhov__[1]vr_[1] + pr_;\n fr_[1][3] = rhov__[1]vr_[2];\n fr_[2][1] = rhov__[2]vr_[0];\n fr_[2][2] = rhov__[2]vr_[1];\n fr_[2][3] = rhov__[2]vr_[2] + pr_;\n\n};\n\n // Sum the left and right velocities and take the normal\n fpdtype_t nv_ = ((nl_v[ldnl(0) + X_IDX])(vl_[0] + vr_[0]) + (nl_v[ldnl(1) + X_IDX])(vl_[1] + vr_[1]) + (nl_v[ldnl(2) + X_IDX])(vl_[2] + vr_[2]));\n\n // Estimate the maximum wave speed / 2\n fpdtype_t a_ = sqrt(0.35(pl_ + pr_)/(ul_v[ul_vix[X_IDX] + SOA_SZ(0)] + ur_v[_nx(0) + X_IDX]))\n + 0.25fabs(nv_);\n\n // Output\n ficomm[0] = 0.5(nl_v[ldnl(0) + X_IDX](fl_[0][0] + fr_[0][0]) + nl_v[ldnl(1) + X_IDX](fl_[1][0] + fr_[1][0]) + nl_v[ldnl(2) + X_IDX](fl_[2][0] + fr_[2][0]))\n + a_(ul_v[ul_vix[X_IDX] + SOA_SZ(0)] - ur_v[nx*(0) + X_IDX]);\n ficomm[1] = 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl[0][1] + fr_[0][1]) + nl_v[ldnl*(1) + X_IDX](fl_[1][1] + fr_[1][1]) + nl_v[ldnl(2) + X_IDX](fl_[2][1] + fr_[2][1]))\n + a_(ul_v[ul_vix[X_IDX] + SOA_SZ*(1)] - ur_v[nx*(1) + X_IDX]);\n ficomm[2] = 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl[0][2] + fr_[0][2]) + nl_v[ldnl*(1) + X_IDX](fl_[1][2] + fr_[1][2]) + nl_v[ldnl(2) + X_IDX](fl_[2][2] + fr_[2][2]))\n + a_(ul_v[ul_vix[X_IDX] + SOA_SZ*(2)] - ur_v[nx*(2) + X_IDX]);\n ficomm[3] = 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl[0][3] + fr_[0][3]) + nl_v[ldnl*(1) + X_IDX](fl_[1][3] + fr_[1][3]) + nl_v[ldnl(2) + X_IDX](fl_[2][3] + fr_[2][3]))\n + a_(ul_v[ul_vix[X_IDX] + SOA_SZ*(3)] - ur_v[nx*(3) + X_IDX]);\n ficomm[4] = 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl[0][4] + fr_[0][4]) + nl_v[ldnl*(1) + X_IDX](fl_[1][4] + fr_[1][4]) + nl_v[ldnl(2) + X_IDX](fl_[2][4] + fr_[2][4]))\n + a_(ul_v[ul_vix[X_IDX] + SOA_SZ*(4)] - ur_v[nx*(4) + X_IDX]);\n\n};\n\n fpdtype_t fvl[3][5] = {{0}};\n {\n\n fpdtype_t rho = ul_v[ul_vix[X_IDX] + SOA_SZ*(0)];\n fpdtype_t rhou_ = ul_v[ul_vix[X_IDX] + SOA_SZ*(1)], rhov_ = ul_v[ul_vix[X_IDX] + SOA_SZ*(2)], rhow_ = ul_v[ul_vix[X_IDX] + SOA_SZ*(3)];\n fpdtype_t E_ = ul_v[ul_vix[X_IDX] + SOA_SZ*(4)];\n\n fpdtype_t rcprho_ = 1.0/rho_;\n fpdtype_t u_ = rcprho_rhou_, v_ = rcprho_rhov_, w_ = rcprho_rhow_;\n\n fpdtype_t rho_x_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](0) + SOA_SZ(0)];\n fpdtype_t rho_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](1) + SOA_SZ*(0)];\n fpdtype_t rho_z_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](2) + SOA_SZ(0)];\n\n // Velocity derivatives (rho_grad[u_,v_,w_])\n fpdtype_t u_x_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](0) + SOA_SZ*(1)] - u_rho_x_;\n fpdtype_t u_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](1) + SOA_SZ*(1)] - u_rho_y_;\n fpdtype_t u_z_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](2) + SOA_SZ*(1)] - u_rho_z_;\n fpdtype_t v_x_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](0) + SOA_SZ*(2)] - v_rho_x_;\n fpdtype_t v_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](1) + SOA_SZ*(2)] - v_rho_y_;\n fpdtype_t v_z_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](2) + SOA_SZ*(2)] - v_rho_z_;\n fpdtype_t w_x_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](0) + SOA_SZ*(3)] - w_rho_x_;\n fpdtype_t w_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](1) + SOA_SZ*(3)] - w_rho_y_;\n fpdtype_t w_z_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](2) + SOA_SZ*(3)] - w_rho_z_;\n\n fpdtype_t E_x_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](0) + SOA_SZ*(4)];\n fpdtype_t E_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](1) + SOA_SZ(4)];\n fpdtype_t E_z_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX](2) + SOA_SZ(4)];\n\n // Compute the temperature and viscosity\n fpdtype_t cpT_ = 1.4*(rcprho_E_ - 0.5(u_u_ + v_v_ + w_w_));\n fpdtype_t Trat_ = 0.4cpT_;\n fpdtype_t mu_c_ = 4.59985915432e-05Trat_sqrt(Trat_)\n / (cpT_ + 1.270376356);\n\n // Compute temperature derivatives (c_vdT/d[x,y,z])\n fpdtype_t T_x_ = rcprho_(E_x_ - (rcprho_rho_x_E_ + u_u_x_ + v_v_x_ + w_w_x_));\n fpdtype_t T_y_ = rcprho_(E_y_ - (rcprho_rho_y_E_ + u_u_y_ + v_v_y_ + w_w_y_));\n fpdtype_t T_z_ = rcprho_(E_z_ - (rcprho_rho_z_E_ + u_u_z_ + v_v_z_ + w_w_z_));\n\n // Negated stress tensor elements\n fpdtype_t t_xx_ = -2mu_c_rcprho_(u_x_ - 0.3333333333333333(u_x_ + v_y_ + w_z_));\n fpdtype_t t_yy_ = -2mu_c_rcprho_(v_y_ - 0.3333333333333333(u_x_ + v_y_ + w_z_));\n fpdtype_t t_zz_ = -2mu_c_rcprho_(w_z_ - 0.3333333333333333(u_x_ + v_y_ + w_z_));\n fpdtype_t t_xy_ = -mu_c_rcprho_(v_x_ + u_y_);\n fpdtype_t t_xz_ = -mu_c_rcprho_(u_z_ + w_x_);\n fpdtype_t t_yz_ = -mu_c_rcprho_(w_y_ + v_z_);\n\n fvl[0][1] += t_xx_; fvl[1][1] += t_xy_; fvl[2][1] += t_xz_;\n fvl[0][2] += t_xy_; fvl[1][2] += t_yy_; fvl[2][2] += t_yz_;\n fvl[0][3] += t_xz_; fvl[1][3] += t_yz_; fvl[2][3] += t_zz_;\n\n fvl[0][4] += u_t_xx_ + v_t_xy_ + w_t_xz_ + -mu_c_1.9444444444444444T_x_;\n fvl[1][4] += u_t_xy_ + v_t_yy_ + w_t_yz_ + -mu_c_1.9444444444444444T_y_;\n fvl[2][4] += u_t_xz_ + v_t_yz_ + w_t_zz_ + -mu_c_1.9444444444444444T_z_;\n\n};\n {\n\n\n};\n\n\n fvcomm = nl_v[ldnl(0) + X_IDX]fvl[0][0] + nl_v[ldnl(1) + X_IDX]fvl[1][0] + nl_v[ldnl(2) + X_IDX]fvl[2][0];\n fvcomm += 0.1(ul_v[ul_vix[X_IDX] + SOA_SZ(0)] - ur_v[_nx(0) + X_IDX]);\n\n ul_v[ul_vix[X_IDX] + SOA_SZ(0)] = magnl_v[X_IDX](ficomm[0] + fvcomm);\n fvcomm = nl_v[ldnl(0) + X_IDX]fvl[0][1] + nl_v[ldnl(1) + X_IDX]fvl[1][1] + nl_v[ldnl(2) + X_IDX]fvl[2][1];\n fvcomm += 0.1(ul_v[ul_vix[X_IDX] + SOA_SZ(1)] - ur_v[_nx(1) + X_IDX]);\n\n ul_v[ul_vix[X_IDX] + SOA_SZ(1)] = magnl_v[X_IDX](ficomm[1] + fvcomm);\n fvcomm = nl_v[ldnl(0) + X_IDX]fvl[0][2] + nl_v[ldnl(1) + X_IDX]fvl[1][2] + nl_v[ldnl(2) + X_IDX]fvl[2][2];\n fvcomm += 0.1(ul_v[ul_vix[X_IDX] + SOA_SZ(2)] - ur_v[_nx(2) + X_IDX]);\n\n ul_v[ul_vix[X_IDX] + SOA_SZ(2)] = magnl_v[X_IDX](ficomm[2] + fvcomm);\n fvcomm = nl_v[ldnl(0) + X_IDX]fvl[0][3] + nl_v[ldnl(1) + X_IDX]fvl[1][3] + nl_v[ldnl(2) + X_IDX]fvl[2][3];\n fvcomm += 0.1(ul_v[ul_vix[X_IDX] + SOA_SZ*(3)] - ur_v[_nx*(3) + X_IDX]);\n\n ul_v[ul_vix[X_IDX] + SOA_SZ*(3)] = magnl_v[X_IDX](ficomm[3] + fvcomm);\n fvcomm = nl_v[ldnl(0) + X_IDX]fvl[0][4] + nl_v[ldnl(1) + X_IDX]fvl[1][4] + nl_v[ldnl(2) + X_IDX]fvl[2][4];\n fvcomm += 0.1(ul_v[ul_vix[X_IDX] + SOA_SZ*(4)] - ur_v[_nx*(4) + X_IDX]);\n\n ul_v[ul_vix[X_IDX] + SOA_SZ*(4)] = magnl_v[X_IDX]*(ficomm[4] + fvcomm);\n\n }\n #undef X_IDX\n #undef X_IDX_AOSOA\n }\n\nq\x01]q\x02(cnumpy\nint32\nq\x03cnumpy\nint64\nq\x04h\x04h\x04h\x04h\x04h\x03h\x04h\x04h\x04e\x87q\x05.’, b’\x80\x03}q\x00.’)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/bin/pyfr”, line 11, in
sys.exit(main())
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/main.py”, line 112, in main
args.process(args)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/main.py”, line 263, in process_restart
_process_common(args, mesh, soln, cfg)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/main.py”, line 226, in _process_common
solver = get_solver(backend, rallocs, mesh, soln, cfg)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/init.py”, line 16, in get_solver
return get_integrator(backend, systemcls, rallocs, mesh, initsoln, cfg)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/init.py”, line 36, in get_integrator
return integrator(backend, systemcls, rallocs, mesh, initsoln, cfg)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/controllers.py”, line 80, in init
super().init(*args, **kwargs)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/controllers.py”, line 14, in init
super().init(*args, **kwargs)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/steppers.py”, line 159, in init
super().init(*args, **kwargs)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/base.py”, line 19, in init
nregs=self.nregs, cfg=cfg)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/base/system.py”, line 64, in init
self._gen_kernels(eles, int_inters, mpi_inters, bc_inters)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/base/system.py”, line 193, in _gen_kernels
kernels[pn, kn].append(kgetter())
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/navstokes/inters.py”, line 69, in
magnl=self._mag_pnorm_lhs, nl=self._norm_pnorm_lhs
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/backends/base/backend.py”, line 163, in kernel
return kern(*args, **kwargs)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/backends/base/kernels.py”, line 162, in kernel_meth
fun = self._build_kernel(name, src, list(it.chain(*argt)))
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/util.py”, line 35, in call
res = cache[key] = self.func(*args, **kwargs)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/backends/cuda/provider.py”, line 20, in _build_kernel
fun = compiler.SourceModule(src).get_function(name)
File “/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pycuda/compiler.py”, line 294, in init
self.module = module_from_buffer(cubin)
pycuda._driver.LogicError: cuModuleLoadDataEx failed: device kernel image is invalid - error : Binary format for key=‘0’, ident=’’ is not recognized
application called MPI_Abort(MPI_COMM_WORLD, 1) - process 7

The same case works just fine with OpenMP backend. I was wondering if you could help me with the error.

Regards
Amir

Hi,

Hi,

thanks for your kind reply.
It did work as I changed the version of CUDA.

Regards
Amir