13 #include <dolfinx/common/log.h>
34 static hid_t
open_file(MPI_Comm comm,
const std::filesystem::path& filename,
35 const std::string& mode,
const bool use_mpi_io);
61 static void write_dataset(
const hid_t handle,
const std::string& dataset_path,
63 const std::array<std::int64_t, 2>& range,
64 const std::vector<std::int64_t>& global_size,
65 bool use_mpi_io,
bool use_chunking);
77 const std::string& dataset_path,
78 const std::array<std::int64_t, 2>& range);
84 static bool has_dataset(
const hid_t handle,
const std::string& dataset_path);
90 static std::vector<std::int64_t>
111 static void add_group(
const hid_t handle,
const std::string& dataset_path);
114 template <
typename T>
115 static hid_t hdf5_type()
117 throw std::runtime_error(
"Cannot get HDF5 primitive data type. "
118 "No specialised function for this data type");
124 inline hid_t HDF5Interface::hdf5_type<float>()
126 return H5T_NATIVE_FLOAT;
130 inline hid_t HDF5Interface::hdf5_type<double>()
132 return H5T_NATIVE_DOUBLE;
136 inline hid_t HDF5Interface::hdf5_type<int>()
138 return H5T_NATIVE_INT;
142 inline hid_t HDF5Interface::hdf5_type<std::int64_t>()
144 return H5T_NATIVE_INT64;
148 inline hid_t HDF5Interface::hdf5_type<std::size_t>()
150 if (
sizeof(std::size_t) ==
sizeof(
unsigned long))
151 return H5T_NATIVE_ULONG;
152 else if (
sizeof(std::size_t) ==
sizeof(
unsigned int))
153 return H5T_NATIVE_UINT;
155 throw std::runtime_error(
"Cannot determine size of std::size_t. "
156 "std::size_t is not the same size as long or int");
159 template <
typename T>
161 const hid_t file_handle,
const std::string& dataset_path,
const T* data,
162 const std::array<std::int64_t, 2>& range,
163 const std::vector<int64_t>& global_size,
bool use_mpi_io,
bool use_chunking)
166 const std::size_t
rank = global_size.size();
171 throw std::runtime_error(
"Cannot write dataset to HDF5 file"
172 "Only rank 1 and rank 2 dataset are supported");
176 const hid_t h5type = hdf5_type<T>();
179 std::vector<hsize_t> count(global_size.begin(), global_size.end());
180 count[0] = range[1] - range[0];
183 std::vector<hsize_t> offset(rank, 0);
184 offset[0] = range[0];
187 const std::vector<hsize_t> dimsf(global_size.begin(), global_size.end());
193 const hid_t filespace0 = H5Screate_simple(rank, dimsf.data(),
nullptr);
194 assert(filespace0 != HDF5_FAIL);
197 hid_t chunking_properties;
201 hsize_t chunk_size = dimsf[0] / 2;
202 if (chunk_size > 1048576)
203 chunk_size = 1048576;
204 if (chunk_size < 1024)
207 hsize_t chunk_dims[2] = {chunk_size, dimsf[1]};
208 chunking_properties = H5Pcreate(H5P_DATASET_CREATE);
209 H5Pset_chunk(chunking_properties, rank, chunk_dims);
212 chunking_properties = H5P_DEFAULT;
215 const std::string group_name(dataset_path, 0, dataset_path.rfind(
'/'));
216 add_group(file_handle, group_name);
220 = H5Dcreate2(file_handle, dataset_path.c_str(), h5type, filespace0,
221 H5P_DEFAULT, chunking_properties, H5P_DEFAULT);
222 assert(dset_id != HDF5_FAIL);
225 status = H5Sclose(filespace0);
226 assert(status != HDF5_FAIL);
229 const hid_t memspace = H5Screate_simple(rank, count.data(),
nullptr);
230 assert(memspace != HDF5_FAIL);
233 const hid_t filespace1 = H5Dget_space(dset_id);
234 status = H5Sselect_hyperslab(filespace1, H5S_SELECT_SET, offset.data(),
235 nullptr, count.data(),
nullptr);
236 assert(status != HDF5_FAIL);
239 const hid_t plist_id = H5Pcreate(H5P_DATASET_XFER);
242 #ifdef H5_HAVE_PARALLEL
243 status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
244 assert(status != HDF5_FAIL);
246 throw std::runtime_error(
"HDF5 library has not been configured with MPI");
251 status = H5Dwrite(dset_id, h5type, memspace, filespace1, plist_id, data);
252 assert(status != HDF5_FAIL);
257 status = H5Pclose(chunking_properties);
258 assert(status != HDF5_FAIL);
262 status = H5Dclose(dset_id);
263 assert(status != HDF5_FAIL);
266 status = H5Sclose(filespace1);
267 assert(status != HDF5_FAIL);
270 status = H5Sclose(memspace);
271 assert(status != HDF5_FAIL);
274 status = H5Pclose(plist_id);
275 assert(status != HDF5_FAIL);
278 template <
typename T>
279 inline std::vector<T>
281 const std::string& dataset_path,
282 const std::array<std::int64_t, 2>& range)
284 auto timer_start = std::chrono::system_clock::now();
288 = H5Dopen2(file_handle, dataset_path.c_str(), H5P_DEFAULT);
289 assert(dset_id != HDF5_FAIL);
292 const hid_t dataspace = H5Dget_space(dset_id);
293 assert(dataspace != HDF5_FAIL);
296 const int rank = H5Sget_simple_extent_ndims(dataspace);
300 LOG(WARNING) <<
"HDF5Interface::read_dataset untested for rank > 2.";
303 std::vector<hsize_t> shape(rank);
306 const int ndims = H5Sget_simple_extent_dims(dataspace, shape.data(),
nullptr);
307 assert(ndims == rank);
310 std::vector<hsize_t> offset(rank, 0);
311 std::vector<hsize_t> count = shape;
312 if (range[0] != -1 and range[1] != -1)
314 offset[0] = range[0];
315 count[0] = range[1] - range[0];
322 herr_t status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset.data(),
323 nullptr, count.data(),
nullptr);
324 assert(status != HDF5_FAIL);
327 const hid_t memspace = H5Screate_simple(rank, count.data(),
nullptr);
328 assert(memspace != HDF5_FAIL);
331 std::size_t data_size = 1;
332 for (std::size_t i = 0; i < count.size(); ++i)
333 data_size *= count[i];
334 std::vector<T> data(data_size);
337 const hid_t h5type = hdf5_type<T>();
339 = H5Dread(dset_id, h5type, memspace, dataspace, H5P_DEFAULT, data.data());
340 assert(status != HDF5_FAIL);
343 status = H5Sclose(dataspace);
344 assert(status != HDF5_FAIL);
347 status = H5Sclose(memspace);
348 assert(status != HDF5_FAIL);
351 status = H5Dclose(dset_id);
352 assert(status != HDF5_FAIL);
354 auto timer_end = std::chrono::system_clock::now();
355 std::chrono::duration<double> dt = (timer_end - timer_start);
356 double data_rate = data.size() *
sizeof(T) / (1e6 * dt.count());
358 LOG(INFO) <<
"HDF5 Read data rate: " << data_rate <<
"MB/s";
This class provides an interface to some HDF5 functionality.
Definition: HDF5Interface.h:26
static std::vector< T > read_dataset(const hid_t handle, const std::string &dataset_path, const std::array< std::int64_t, 2 > &range)
Read data from a HDF5 dataset "dataset_path" as defined by range blocks on each process.
static bool has_dataset(const hid_t handle, const std::string &dataset_path)
Check for existence of dataset in HDF5 file.
Definition: HDF5Interface.cpp:149
static void set_mpi_atomicity(const hid_t handle, const bool atomic)
Set MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-SetMpiAtomicity and ...
Definition: HDF5Interface.cpp:232
static void close_file(const hid_t handle)
Close HDF5 file.
Definition: HDF5Interface.cpp:120
static hid_t open_file(MPI_Comm comm, const std::filesystem::path &filename, const std::string &mode, const bool use_mpi_io)
Open HDF5 and return file descriptor.
Definition: HDF5Interface.cpp:58
static std::vector< std::int64_t > get_dataset_shape(const hid_t handle, const std::string &dataset_path)
Get dataset shape (size of each dimension)
Definition: HDF5Interface.cpp:199
static void flush_file(const hid_t handle)
Flush data to file to improve data integrity after interruption.
Definition: HDF5Interface.cpp:126
static std::filesystem::path get_filename(hid_t handle)
Get filename.
Definition: HDF5Interface.cpp:132
static bool get_mpi_atomicity(const hid_t handle)
Get MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-GetMpiAtomicity and ...
Definition: HDF5Interface.cpp:240
static void write_dataset(const hid_t handle, const std::string &dataset_path, const T *data, const std::array< std::int64_t, 2 > &range, const std::vector< std::int64_t > &global_size, bool use_mpi_io, bool use_chunking)
Write data to existing HDF file as defined by range blocks on each process.
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition: MPI.cpp:74
Support for file IO.
Definition: cells.h:22