Thanks for working through it. Here is an updated patch.
diff --git a/pyfr/mpiutil.py b/pyfr/mpiutil.py
index 64e56843..305d5fd3 100644
--- a/pyfr/mpiutil.py
+++ b/pyfr/mpiutil.py
@@ -320,12 +320,17 @@ class SparseScatterer(AlltoallMixin):
class Sorter(AlltoallMixin):
- def __init__(self, comm, keys, dtype=int):
+ typemap = {
+ np.int8: np.uint8, np.int16: np.uint16,
+ np.int32: np.uint32, np.int64: np.uint64
+ }
+
+ def __init__(self, comm, keys):
self.comm = comm
# Locally sort our outbound keys
self.sidx = np.argsort(keys)
- skeys = keys[self.sidx].astype(dtype)
+ skeys = keys[self.sidx]
# Determine the total size of the array
size = scal_coll(comm.Allreduce, len(keys))
@@ -346,16 +351,30 @@ class Sorter(AlltoallMixin):
self.ridx = np.argsort(rkeys)
self.keys = rkeys[self.ridx]
+ def _transform_keys(self, skeys):
+ dtype = skeys.dtype
+
+ if np.issubdtype(dtype, np.unsignedinteger):
+ return skeys
+ elif np.issubdtype(dtype, np.signedinteger):
+ tdtype = self.typemap[dtype]
+ return skeys.view(tdtype) ^ tdtype(np.iinfo(dtype).max + 1)
+ else:
+ raise ValueError('Unsupported dtype')
+
def _splitters(self, skeys, r):
+ # Transform the keys so they're unsigned integers
+ skeys = self._transform_keys(skeys)
+
# Determine the minimum and maximum values in the array
- kmin = scal_coll(self.comm.Allreduce, skeys[0], op=mpi.MIN)
- kmax = scal_coll(self.comm.Allreduce, skeys[-1], op=mpi.MAX)
+ kmin = self.comm.allreduce(int(skeys[0]), op=mpi.MIN)
+ kmax = self.comm.allreduce(int(skeys[-1]), op=mpi.MAX)
# Compute the number of bits in the key space
- W = math.ceil(np.log2(kmax - kmin + 1))
+ W = math.ceil(math.log2(kmax - kmin + 1))
e, rt = 0, 0
- q = np.empty(self.comm.size, dtype=int)
+ q = np.empty(self.comm.size, dtype=skeys.dtype)
for i in range(W - 1, -1, -1):
# Compute and gather the probes
@@ -384,7 +403,7 @@ class Sorter(AlltoallMixin):
q[self.comm.rank] = r - rt
self.comm.Allgather(mpi.IN_PLACE, q)
- return lbnd + np.maximum(0, np.minimum(ld, q - gd))
+ return lbnd + np.maximum(0, np.minimum(ld, q.astype(int) - gd))
def __call__(self, svals):
# Locally sort our data
diff --git a/pyfr/nputil.py b/pyfr/nputil.py
index 14af796d..d26140b2 100644
--- a/pyfr/nputil.py
+++ b/pyfr/nputil.py
@@ -56,14 +56,14 @@ def clean(origfn=None, tol=1e-10, ckwarg='clean'):
return cleanfn(origfn) if origfn else cleanfn
-def morton_encode(ipts, imax, dtype=int):
+def morton_encode(ipts, imax, dtype=np.uint64):
# Allocate the codes
codes = np.zeros(len(ipts), dtype=dtype)
# Determine how many bits to use for each input dimension
ndims = ipts.shape[1]
obits = 8*codes.dtype.itemsize
- ibits = obits // ndims
+ ibits = dtype(obits // ndims)
ishift = np.array([max(int(p).bit_length() - ibits, 0) for p in imax])
# Compute the masks and shifts
diff --git a/pyfr/resamplers.py b/pyfr/resamplers.py
index 456a20d6..b7591373 100644
--- a/pyfr/resamplers.py
+++ b/pyfr/resamplers.py
@@ -105,7 +105,7 @@ class BaseCloudResampler(AlltoallMixin):
# Normalise our points and compute their Morton codes
fact = 2**21 if len(pmin) == 3 else 2**32
- ipts = (fact*((pts - pmin) / (pmax - pmin).max())).astype(int)
+ ipts = (fact*((pts - pmin) / (pmax - pmin).max())).astype(np.uint64)
keys = morton_encode(ipts, [fact]*len(pmin))
# Use these codes to sort our points
@@ -130,7 +130,7 @@ class BaseCloudResampler(AlltoallMixin):
bl = int(keys.max()).bit_length()
# With this break our points up into their top-level orthants
- sidx = np.searchsorted(keys, np.arange(2**nb) << (bl - nb))
+ sidx = np.searchsorted(keys, np.arange(1, 2**nb) << (bl - nb))
# Allocate global bounding box array
bboxes = np.full((comm.size, 2**nb, 2, self.ndims), np.nan)
Regards, Freddie.