Hi Nigel,
I was trying to compare the performance using different backend, the CUDA and the OpenMP. The compilation with CUDA backend went well as the way you replied. However, while I ran the simulation with OpenMP backend, an error came up as
Traceback (most recent call last):
File "e:\pyfr_test\pyfr\util.py", line 33, in __call__
res = cache[key]
KeyError: (<function OpenMPKernelProvider._build_kernel at 0x0000020D34FE7EE0>, b'\x80\x04\x955\x03\x00\x00\x00\x00\x00\x00\x8c\nbatch_gemm\x94X\xf3\x02\x00\x00\n\n#include <omp.h>\n#include <stdlib.h
>\n#include <tgmath.h>\n\n#define SOA_SZ 8\n#define BLK_SZ 8\n\n#define min(a, b) ((a) < (b) ? (a) : (b))\n#define max(a, b) ((a) > (b) ? (a) : (b))\n\n// Typedefs\ntypedef double fpdtype_t;\n\n\n\n//
libxsmm prototype\ntypedef void (*libxsmm_xfsspmdm_execute)(void *, const fpdtype_t *,\n fpdtype_t *);\n\n// gimmik prototype\ntypedef void (*gimmik_execute)(i
nt, const fpdtype_t *, int, fpdtype_t *, int);\n\nvoid\nbatch_gemm(gimmik_execute exec, int bldim,\n int nblocks,\n const fpdtype_t *b, int bblocksz, fpdtype_t *c, int cblocksz)\n{
\n #pragma omp parallel for\n for (int ib = 0; ib < nblocks; ib++)\n exec(bldim, b + ib*bblocksz, bldim, c + ib*cblocksz, bldim);\n}\n\n\x94]\x94(\x8c\x05numpy\x94\x8c\x05int64\x94\x93\x9
4h\x03\x8c\x05int32\x94\x93\x94h\x07h\x05h\x07h\x05h\x07e\x87\x94.', b'\x80\x04}\x94.')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "E:\Anaconda\envs\pyfr_tf\Scripts\pyfr-script.py", line 33, in <module>
sys.exit(load_entry_point('pyfr', 'console_scripts', 'pyfr')())
File "e:\pyfr_test\pyfr\__main__.py", line 117, in main
args.process(args)
File "e:\pyfr_test\pyfr\__main__.py", line 250, in process_run
_process_common(
File "e:\pyfr_test\pyfr\__main__.py", line 232, in _process_common
solver = get_solver(backend, rallocs, mesh, soln, cfg)
File "e:\pyfr_test\pyfr\solvers\__init__.py", line 16, in get_solver
return get_integrator(backend, systemcls, rallocs, mesh, initsoln, cfg)
File "e:\pyfr_test\pyfr\integrators\__init__.py", line 36, in get_integrator
return integrator(backend, systemcls, rallocs, mesh, initsoln, cfg)
File "e:\pyfr_test\pyfr\integrators\std\controllers.py", line 13, in __init__
super().__init__(*args, **kwargs)
File "e:\pyfr_test\pyfr\integrators\std\base.py", line 27, in __init__
self.system = systemcls(backend, rallocs, mesh, initsoln,
File "e:\pyfr_test\pyfr\solvers\base\system.py", line 68, in __init__
self._gen_kernels(eles, int_inters, mpi_inters, bc_inters)
File "e:\pyfr_test\pyfr\solvers\base\system.py", line 187, in _gen_kernels
kernels[pn, kn].append(kgetter())
File "e:\pyfr_test\pyfr\solvers\baseadvec\elements.py", line 45, in <lambda>
kernels['disu'] = lambda: self._be.kernel(
File "e:\pyfr_test\pyfr\backends\base\backend.py", line 163, in kernel
return kern(*args, **kwargs)
File "e:\pyfr_test\pyfr\backends\openmp\gimmik.py", line 48, in mul
batch_gemm = self._build_kernel('batch_gemm', src, argt)
File "e:\pyfr_test\pyfr\util.py", line 35, in __call__
res = cache[key] = self.func(*args, **kwargs)
File "e:\pyfr_test\pyfr\backends\openmp\provider.py", line 13, in _build_kernel
mod = SourceModule(src, self.backend.cfg)
File "e:\pyfr_test\pyfr\backends\openmp\compiler.py", line 65, in __init__
self.mod = self._cache_set_and_loadlib(lpath)
File "e:\pyfr_test\pyfr\backends\openmp\compiler.py", line 130, in _cache_set_and_loadlib
return CDLL(clpath)
File "E:\Anaconda\envs\pyfr_tf\lib\ctypes\__init__.py", line 373, in __init__
self._handle = _dlopen(self._name, mode)
FileNotFoundError: Could not find module 'C:\Users\Thatcher\AppData\Local\pyfr\pyfr\Cache\c8188542ba03ceef8f44731c24cb91566c3cecd078454acb4da2577a34372d9f.dll' (or one of its dependencies). Try using the
full path with constructor syntax.
Actually, I searched in this forum and found several questions relates to a similar problem, but all of them seems to exist with older versions. With years of update, I believe some sort of questions or bugs may be solved with renovation. During the compilation of OpenMP, I installed MS-MPI, GCC (by MinGW) and mpi4py, all of them are added to the environment. (Ref. Installation Steps)
I have to admit that libxsmm.dll
is absent, still have some problems to make it available. But to me, the above error had nothing to do with libxsmm, for it will work under certain dense and sparse matrix operations, clearly not suitable to the situation. So I was wondering if I was missing something? Or anything to be careful with these steps?
PS. I think it’s better for me to rename this topic since the problem extends to how to compile and run PyFR on Windows10 both with CUDA and OpenMP.