Skip to content

Commit b00e872

Browse files
committed
rebase & clean up HDF5DataLayer Prefetch
Adapt HDF5DataLayer Prefetch to BVLC#2836
1 parent 87b27d1 commit b00e872

File tree

6 files changed

+129
-165
lines changed

6 files changed

+129
-165
lines changed

include/caffe/util/hdf5.hpp

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,43 @@
1010

1111
namespace caffe {
1212

13+
/**
14+
* @brief Shapes a Blob to read "num" rows of HDF5 data. If num == -1, take
15+
* the num of the HDF5 dataset.
16+
*
17+
* @param file_id the HDF5 file handle
18+
* @param dataset_name the name of the HDF5 dataset to read
19+
* @param num the number of rows to read: either num >= 0,
20+
* or num == -1 for the number of rows in the HDF5 dataset
21+
* @param blob the Blob to shape
22+
*
23+
* The HDF5 dataset could be N(>=1) dimensions as long as N doesn't exceed
24+
* Blob's maximum dimension.
25+
*/
1326
template <typename Dtype>
14-
void hdf5_load_nd_dataset_helper(
15-
hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
27+
void HDF5PrepareBlob(hid_t file_id, const char* dataset_name, int num,
1628
Blob<Dtype>* blob);
1729

30+
/**
31+
* @brief Reads rows [offset, offset + data->num() - 1] into Blob* data, which
32+
* must have been pre-shaped using HDF5PrepareBlob (or otherwise).
33+
*/
1834
template <typename Dtype>
19-
void hdf5_load_nd_dataset(
20-
hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
21-
Blob<Dtype>* blob);
35+
int HDF5ReadRowsToBlob(hid_t file_id, const char* dataset_name, int h5_offset,
36+
int blob_offset, Blob<Dtype>* blob);
37+
38+
template <typename Dtype>
39+
void hdf5_load_nd_dataset_helper(hid_t file_id, const char* dataset_name_,
40+
int min_dim, int max_dim, Blob<Dtype>* blob);
41+
42+
template <typename Dtype>
43+
void hdf5_load_nd_dataset(hid_t file_id, const char* dataset_name_, int min_dim,
44+
int max_dim, Blob<Dtype>* blob);
2245

2346
template <typename Dtype>
2447
void hdf5_save_nd_dataset(
25-
const hid_t file_id, const string& dataset_name, const Blob<Dtype>& blob,
48+
const hid_t file_id, const string& dataset_name, const Blob<Dtype>& blob,
2649
bool write_diff = false);
27-
2850
int hdf5_load_int(hid_t loc_id, const string& dataset_name);
2951
void hdf5_save_int(hid_t loc_id, const string& dataset_name, int i);
3052
string hdf5_load_string(hid_t loc_id, const string& dataset_name);

include/caffe/util/io.hpp

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -136,34 +136,6 @@ cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color);
136136

137137
void CVMatToDatum(const cv::Mat& cv_img, Datum* datum);
138138

139-
/**
140-
* @brief Shapes a Blob to read "num" rows of HDF5 data. If num == -1, take
141-
* the num of the HDF5 dataset.
142-
*
143-
* @param file_id the HDF5 file handle
144-
* @param dataset_name the name of the HDF5 dataset to read
145-
* @param num the number of rows to read: either num >= 0,
146-
* or num == -1 for the number of rows in the HDF5 dataset
147-
* @param blob the Blob to shape
148-
*
149-
* The HDF5 dataset could be N(>=1) dimensions as long as N doesn't exceed Blob's maximum dimension.
150-
*/
151-
template <typename Dtype>
152-
void HDF5PrepareBlob(hid_t file_id, const char* dataset_name, int num,
153-
Blob<Dtype>* blob);
154-
155-
/**
156-
* @brief Reads rows [offset, offset + data->num() - 1] into Blob* data, which
157-
* must have been pre-shaped using HDF5PrepareBlob (or otherwise).
158-
*/
159-
template <typename Dtype>
160-
int HDF5ReadRowsToBlob(hid_t file_id, const char* dataset_name,
161-
int h5_offset, int blob_offset, Blob<Dtype>* blob);
162-
163-
template <typename Dtype>
164-
void hdf5_save_nd_dataset(
165-
const hid_t file_id, const string& dataset_name, const Blob<Dtype>& blob);
166-
167139
} // namespace caffe
168140

169141
#endif // CAFFE_UTIL_IO_H_

src/caffe/layers/hdf5_data_layer.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
#include <string>
99
#include <vector>
1010

11-
#include "hdf5.h"
12-
#include "hdf5_hl.h"
1311
#include "stdint.h"
1412

1513
#include "caffe/data_layers.hpp"
@@ -77,7 +75,7 @@ void HDF5DataLayer<Dtype>::FillHDF5FileData() {
7775

7876
template <typename Dtype>
7977
void HDF5DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
80-
const vector<Blob<Dtype>*>& top) {
78+
const vector<Blob<Dtype>*>& top) {
8179
// Refuse transformation parameters since HDF5 is totally generic.
8280
CHECK(!this->layer_param_.has_transform_param()) <<
8381
this->type() << " does not transform data.";
@@ -151,14 +149,12 @@ void HDF5DataLayer<Dtype>::InternalThreadEntry() {
151149

152150
template <typename Dtype>
153151
void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
154-
const vector<Blob<Dtype>*>& top) {
152+
const vector<Blob<Dtype>*>& top) {
155153
this->JoinPrefetchThread();
156-
157154
for (int i = 0; i < top.size(); ++i) {
158155
const int count = top[i]->count();
159156
caffe_copy(count, hdf_blobs_[i]->cpu_data(), top[i]->mutable_cpu_data());
160157
}
161-
162158
this->CreatePrefetchThread();
163159
}
164160

src/caffe/test/test_hdf5data_layer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ class HDF5DataLayerTest : public MultiDeviceTest<TypeParam> {
117117
}
118118
}
119119
}
120-
};
120+
}
121121

122122
TYPED_TEST_CASE(HDF5DataLayerTest, TestDtypesAndDevices);
123123

src/caffe/util/hdf5.cpp

Lines changed: 97 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,103 @@
11
#include "caffe/util/hdf5.hpp"
22

3+
#include <algorithm>
34
#include <string>
45
#include <vector>
56

67
namespace caffe {
78

9+
// Verifies format of data stored in HDF5 file and reshapes blob accordingly.
10+
template <typename Dtype>
11+
void HDF5PrepareBlob(hid_t file_id, const char* dataset_name, int num,
12+
Blob<Dtype>* blob) {
13+
// Verify that the dataset exists.
14+
CHECK(H5LTfind_dataset(file_id, dataset_name))
15+
<< "Failed to find HDF5 dataset " << dataset_name;
16+
herr_t status;
17+
int ndims;
18+
CHECK_LE(0, H5LTget_dataset_ndims(file_id, dataset_name, &ndims))
19+
<< "Failed to get dataset ndims for " << dataset_name;
20+
CHECK_GE(ndims, 1) << "HDF5 dataset must have at least 1 dimension.";
21+
CHECK_LE(ndims, kMaxBlobAxes)
22+
<< "HDF5 dataset must have at most "
23+
<< kMaxBlobAxes << " dimensions, to fit in a Blob.";
24+
25+
// Verify that the data format is what we expect: float or double.
26+
std::vector<hsize_t> dims(ndims);
27+
H5T_class_t h5_class;
28+
status = H5LTget_dataset_info(
29+
file_id, dataset_name, dims.data(), &h5_class, NULL);
30+
CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name;
31+
CHECK_EQ(h5_class, H5T_FLOAT) << "Expected float or double data";
32+
CHECK_GE(num, -1) << "num must be -1 (to indicate the number of rows"
33+
"in the dataset) or non-negative.";
34+
35+
vector<int> blob_dims(dims.size());
36+
blob_dims[0] = (num == -1) ? dims[0] : num;
37+
for (int i = 1; i < dims.size(); ++i) {
38+
blob_dims[i] = dims[i];
39+
}
40+
blob->Reshape(blob_dims);
41+
}
42+
43+
template
44+
void HDF5PrepareBlob<float>(hid_t file_id, const char* dataset_name, int num,
45+
Blob<float>* blob);
46+
47+
template
48+
void HDF5PrepareBlob<double>(hid_t file_id, const char* dataset_name, int num,
49+
Blob<double>* blob);
50+
51+
template <typename Dtype>
52+
int HDF5ReadRowsToBlob(hid_t file_id, const char* dataset_name,
53+
int h5_offset, int blob_offset, Blob<Dtype>* blob) {
54+
int ndims;
55+
CHECK_LE(0, H5LTget_dataset_ndims(file_id, dataset_name, &ndims))
56+
<< "Failed to get dataset ndims for " << dataset_name;
57+
std::vector<hsize_t> dims(ndims);
58+
H5T_class_t h5_class;
59+
herr_t status = H5LTget_dataset_info(
60+
file_id, dataset_name, dims.data(), &h5_class, NULL);
61+
CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name;
62+
CHECK_EQ(h5_class, H5T_FLOAT) << "Expected float or double data";
63+
hid_t dataset = H5Dopen2(file_id, dataset_name, H5P_DEFAULT);
64+
hid_t dataspace = H5Dget_space(dataset);
65+
vector<hsize_t> slab_start(ndims, 0);
66+
slab_start[0] = h5_offset;
67+
const int num_rows_available = dims[0] - h5_offset;
68+
const int num_rows = std::min(blob->num() - blob_offset, num_rows_available);
69+
if (num_rows <= 0) {
70+
return 0;
71+
}
72+
vector<hsize_t> slab_count(ndims, num_rows);
73+
for (int i = 1; i < ndims; ++i) {
74+
slab_count[i] = dims[i];
75+
}
76+
status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET,
77+
slab_start.data(), NULL, slab_count.data(), NULL);
78+
CHECK_GE(status, 0) << "Failed to select slab.";
79+
hid_t memspace = H5Screate_simple(ndims, slab_count.data(), NULL);
80+
const int data_size = blob->count() / blob->num();
81+
// separate multiplication to avoid a possible overflow
82+
const int blob_offset_size = blob_offset * data_size;
83+
hid_t type = (sizeof(Dtype) == 4) ? H5T_NATIVE_FLOAT : H5T_NATIVE_DOUBLE;
84+
status = H5Dread(dataset, type, memspace, dataspace, H5P_DEFAULT,
85+
blob->mutable_cpu_data() + blob_offset_size);
86+
CHECK_GE(status, 0) << "Failed to read dataset " << dataset_name;
87+
H5Dclose(dataset);
88+
H5Sclose(dataspace);
89+
H5Sclose(memspace);
90+
return num_rows;
91+
}
92+
93+
template
94+
int HDF5ReadRowsToBlob<float>(hid_t file_id, const char* dataset_name,
95+
int h5_offset, int blob_offset, Blob<float>* data);
96+
97+
template
98+
int HDF5ReadRowsToBlob<double>(hid_t file_id, const char* dataset_name,
99+
int h5_offset, int blob_offset, Blob<double>* data);
100+
8101
// Verifies format of data stored in HDF5 file and reshapes blob accordingly.
9102
template <typename Dtype>
10103
void hdf5_load_nd_dataset_helper(
@@ -59,7 +152,7 @@ void hdf5_save_nd_dataset<float>(
59152
const hid_t file_id, const string& dataset_name, const Blob<float>& blob,
60153
bool write_diff) {
61154
int num_axes = blob.num_axes();
62-
hsize_t *dims = new hsize_t[num_axes];
155+
std::vector<hsize_t> dims(num_axes);
63156
for (int i = 0; i < num_axes; ++i) {
64157
dims[i] = blob.shape(i);
65158
}
@@ -70,17 +163,16 @@ void hdf5_save_nd_dataset<float>(
70163
data = blob.cpu_data();
71164
}
72165
herr_t status = H5LTmake_dataset_float(
73-
file_id, dataset_name.c_str(), num_axes, dims, data);
166+
file_id, dataset_name.c_str(), num_axes, dims.data(), data);
74167
CHECK_GE(status, 0) << "Failed to make float dataset " << dataset_name;
75-
delete[] dims;
76168
}
77169

78170
template <>
79171
void hdf5_save_nd_dataset<double>(
80172
hid_t file_id, const string& dataset_name, const Blob<double>& blob,
81173
bool write_diff) {
82174
int num_axes = blob.num_axes();
83-
hsize_t *dims = new hsize_t[num_axes];
175+
std::vector<hsize_t> dims(num_axes);
84176
for (int i = 0; i < num_axes; ++i) {
85177
dims[i] = blob.shape(i);
86178
}
@@ -91,9 +183,8 @@ void hdf5_save_nd_dataset<double>(
91183
data = blob.cpu_data();
92184
}
93185
herr_t status = H5LTmake_dataset_double(
94-
file_id, dataset_name.c_str(), num_axes, dims, data);
186+
file_id, dataset_name.c_str(), num_axes, dims.data(), data);
95187
CHECK_GE(status, 0) << "Failed to make double dataset " << dataset_name;
96-
delete[] dims;
97188
}
98189

99190
string hdf5_load_string(hid_t loc_id, const string& dataset_name) {

src/caffe/util/io.cpp

Lines changed: 0 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -228,122 +228,5 @@ void CVMatToDatum(const cv::Mat& cv_img, Datum* datum) {
228228
datum->set_data(buffer);
229229
}
230230

231-
// Verifies format of data stored in HDF5 file and reshapes blob accordingly.
232-
template <typename Dtype>
233-
void HDF5PrepareBlob(hid_t file_id, const char* dataset_name, int num,
234-
Blob<Dtype>* blob) {
235-
// Verify that the dataset exists.
236-
CHECK(H5LTfind_dataset(file_id, dataset_name))
237-
<< "Failed to find HDF5 dataset " << dataset_name;
238-
herr_t status;
239-
int ndims;
240-
CHECK_LE(0, H5LTget_dataset_ndims(file_id, dataset_name, &ndims))
241-
<< "Failed to get dataset ndims for " << dataset_name;
242-
CHECK_GE(ndims, 1) << "HDF5 dataset must have at least 1 dimension.";
243-
CHECK_LE(ndims, kMaxBlobAxes)
244-
<< "HDF5 dataset must have at most "
245-
<< kMaxBlobAxes << " dimensions, to fit in a Blob.";
246-
247-
// Verify that the data format is what we expect: float or double.
248-
std::vector<hsize_t> dims(ndims);
249-
H5T_class_t h5_class;
250-
status = H5LTget_dataset_info(
251-
file_id, dataset_name, dims.data(), &h5_class, NULL);
252-
CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name;
253-
CHECK_EQ(h5_class, H5T_FLOAT) << "Expected float or double data";
254-
CHECK_GE(num, -1) << "num must be -1 (to indicate the number of rows"
255-
"in the dataset) or non-negative.";
256-
257-
vector<int> blob_dims(dims.size());
258-
blob_dims[0] = (num == -1) ? dims[0] : num;
259-
for (int i = 1; i < dims.size(); ++i) {
260-
blob_dims[i] = dims[i];
261-
}
262-
blob->Reshape(blob_dims);
263-
}
264-
265-
template
266-
void HDF5PrepareBlob<float>(hid_t file_id, const char* dataset_name, int num,
267-
Blob<float>* blob);
268-
269-
template
270-
void HDF5PrepareBlob<double>(hid_t file_id, const char* dataset_name, int num,
271-
Blob<double>* blob);
272-
273-
template <typename Dtype>
274-
int HDF5ReadRowsToBlob(hid_t file_id, const char* dataset_name,
275-
int h5_offset, int blob_offset, Blob<Dtype>* blob) {
276-
int ndims;
277-
CHECK_LE(0, H5LTget_dataset_ndims(file_id, dataset_name, &ndims))
278-
<< "Failed to get dataset ndims for " << dataset_name;
279-
std::vector<hsize_t> dims(ndims);
280-
H5T_class_t h5_class;
281-
herr_t status = H5LTget_dataset_info(
282-
file_id, dataset_name, dims.data(), &h5_class, NULL);
283-
CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name;
284-
CHECK_EQ(h5_class, H5T_FLOAT) << "Expected float or double data";
285-
hid_t dataset = H5Dopen2(file_id, dataset_name, H5P_DEFAULT);
286-
hid_t dataspace = H5Dget_space(dataset);
287-
vector<hsize_t> slab_start(ndims, 0);
288-
slab_start[0] = h5_offset;
289-
const int num_rows_available = dims[0] - h5_offset;
290-
const int num_rows = std::min(blob->num() - blob_offset, num_rows_available);
291-
if (num_rows <= 0) {
292-
return 0;
293-
}
294-
vector<hsize_t> slab_count(ndims, num_rows);
295-
for (int i = 1; i < ndims; ++i) {
296-
slab_count[i] = dims[i];
297-
}
298-
status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET,
299-
slab_start.data(), NULL, slab_count.data(), NULL);
300-
CHECK_GE(status, 0) << "Failed to select slab.";
301-
hid_t memspace = H5Screate_simple(ndims, slab_count.data(), NULL);
302-
const int data_size = blob->count() / blob->num();
303-
// separate multiplication to avoid a possible overflow
304-
const int blob_offset_size = blob_offset * data_size;
305-
hid_t type = (sizeof(Dtype) == 4) ? H5T_NATIVE_FLOAT : H5T_NATIVE_DOUBLE;
306-
status = H5Dread(dataset, type, memspace, dataspace, H5P_DEFAULT,
307-
blob->mutable_cpu_data() + blob_offset_size);
308-
CHECK_GE(status, 0) << "Failed to read dataset " << dataset_name;
309-
H5Dclose(dataset);
310-
H5Sclose(dataspace);
311-
H5Sclose(memspace);
312-
return num_rows;
313-
}
314-
315-
template
316-
int HDF5ReadRowsToBlob<float>(hid_t file_id, const char* dataset_name,
317-
int h5_offset, int blob_offset, Blob<float>* data);
318-
319-
template
320-
int HDF5ReadRowsToBlob<double>(hid_t file_id, const char* dataset_name,
321-
int h5_offset, int blob_offset, Blob<double>* data);
322-
323-
template <>
324-
void hdf5_save_nd_dataset<float>(
325-
const hid_t file_id, const string& dataset_name, const Blob<float>& blob) {
326-
hsize_t dims[HDF5_NUM_DIMS];
327-
dims[0] = blob.num();
328-
dims[1] = blob.channels();
329-
dims[2] = blob.height();
330-
dims[3] = blob.width();
331-
herr_t status = H5LTmake_dataset_float(
332-
file_id, dataset_name.c_str(), HDF5_NUM_DIMS, dims, blob.cpu_data());
333-
CHECK_GE(status, 0) << "Failed to make float dataset " << dataset_name;
334-
}
335-
336-
template <>
337-
void hdf5_save_nd_dataset<double>(
338-
const hid_t file_id, const string& dataset_name, const Blob<double>& blob) {
339-
hsize_t dims[HDF5_NUM_DIMS];
340-
dims[0] = blob.num();
341-
dims[1] = blob.channels();
342-
dims[2] = blob.height();
343-
dims[3] = blob.width();
344-
herr_t status = H5LTmake_dataset_double(
345-
file_id, dataset_name.c_str(), HDF5_NUM_DIMS, dims, blob.cpu_data());
346-
CHECK_GE(status, 0) << "Failed to make double dataset " << dataset_name;
347-
}
348231

349232
} // namespace caffe

0 commit comments

Comments
 (0)