1
1
#include " caffe/util/hdf5.hpp"
2
2
3
+ #include < algorithm>
3
4
#include < string>
4
5
#include < vector>
5
6
6
7
namespace caffe {
7
8
9
+ // Verifies format of data stored in HDF5 file and reshapes blob accordingly.
10
+ template <typename Dtype>
11
+ void HDF5PrepareBlob (hid_t file_id, const char * dataset_name, int num,
12
+ Blob<Dtype>* blob) {
13
+ // Verify that the dataset exists.
14
+ CHECK (H5LTfind_dataset (file_id, dataset_name))
15
+ << " Failed to find HDF5 dataset " << dataset_name;
16
+ herr_t status;
17
+ int ndims;
18
+ CHECK_LE (0 , H5LTget_dataset_ndims (file_id, dataset_name, &ndims))
19
+ << " Failed to get dataset ndims for " << dataset_name;
20
+ CHECK_GE (ndims, 1 ) << " HDF5 dataset must have at least 1 dimension." ;
21
+ CHECK_LE (ndims, kMaxBlobAxes )
22
+ << " HDF5 dataset must have at most "
23
+ << kMaxBlobAxes << " dimensions, to fit in a Blob." ;
24
+
25
+ // Verify that the data format is what we expect: float or double.
26
+ std::vector<hsize_t > dims (ndims);
27
+ H5T_class_t h5_class;
28
+ status = H5LTget_dataset_info (
29
+ file_id, dataset_name, dims.data (), &h5_class, NULL );
30
+ CHECK_GE (status, 0 ) << " Failed to get dataset info for " << dataset_name;
31
+ CHECK_EQ (h5_class, H5T_FLOAT) << " Expected float or double data" ;
32
+ CHECK_GE (num, -1 ) << " num must be -1 (to indicate the number of rows"
33
+ " in the dataset) or non-negative." ;
34
+
35
+ vector<int > blob_dims (dims.size ());
36
+ blob_dims[0 ] = (num == -1 ) ? dims[0 ] : num;
37
+ for (int i = 1 ; i < dims.size (); ++i) {
38
+ blob_dims[i] = dims[i];
39
+ }
40
+ blob->Reshape (blob_dims);
41
+ }
42
+
43
+ template
44
+ void HDF5PrepareBlob<float >(hid_t file_id, const char * dataset_name, int num,
45
+ Blob<float >* blob);
46
+
47
+ template
48
+ void HDF5PrepareBlob<double >(hid_t file_id, const char * dataset_name, int num,
49
+ Blob<double >* blob);
50
+
51
+ template <typename Dtype>
52
+ int HDF5ReadRowsToBlob (hid_t file_id, const char * dataset_name,
53
+ int h5_offset, int blob_offset, Blob<Dtype>* blob) {
54
+ int ndims;
55
+ CHECK_LE (0 , H5LTget_dataset_ndims (file_id, dataset_name, &ndims))
56
+ << " Failed to get dataset ndims for " << dataset_name;
57
+ std::vector<hsize_t > dims (ndims);
58
+ H5T_class_t h5_class;
59
+ herr_t status = H5LTget_dataset_info (
60
+ file_id, dataset_name, dims.data (), &h5_class, NULL );
61
+ CHECK_GE (status, 0 ) << " Failed to get dataset info for " << dataset_name;
62
+ CHECK_EQ (h5_class, H5T_FLOAT) << " Expected float or double data" ;
63
+ hid_t dataset = H5Dopen2 (file_id, dataset_name, H5P_DEFAULT);
64
+ hid_t dataspace = H5Dget_space (dataset);
65
+ vector<hsize_t > slab_start (ndims, 0 );
66
+ slab_start[0 ] = h5_offset;
67
+ const int num_rows_available = dims[0 ] - h5_offset;
68
+ const int num_rows = std::min (blob->num () - blob_offset, num_rows_available);
69
+ if (num_rows <= 0 ) {
70
+ return 0 ;
71
+ }
72
+ vector<hsize_t > slab_count (ndims, num_rows);
73
+ for (int i = 1 ; i < ndims; ++i) {
74
+ slab_count[i] = dims[i];
75
+ }
76
+ status = H5Sselect_hyperslab (dataspace, H5S_SELECT_SET,
77
+ slab_start.data (), NULL , slab_count.data (), NULL );
78
+ CHECK_GE (status, 0 ) << " Failed to select slab." ;
79
+ hid_t memspace = H5Screate_simple (ndims, slab_count.data (), NULL );
80
+ const int data_size = blob->count () / blob->num ();
81
+ // separate multiplication to avoid a possible overflow
82
+ const int blob_offset_size = blob_offset * data_size;
83
+ hid_t type = (sizeof (Dtype) == 4 ) ? H5T_NATIVE_FLOAT : H5T_NATIVE_DOUBLE;
84
+ status = H5Dread (dataset, type, memspace, dataspace, H5P_DEFAULT,
85
+ blob->mutable_cpu_data () + blob_offset_size);
86
+ CHECK_GE (status, 0 ) << " Failed to read dataset " << dataset_name;
87
+ H5Dclose (dataset);
88
+ H5Sclose (dataspace);
89
+ H5Sclose (memspace);
90
+ return num_rows;
91
+ }
92
+
93
+ template
94
+ int HDF5ReadRowsToBlob<float >(hid_t file_id, const char * dataset_name,
95
+ int h5_offset, int blob_offset, Blob<float >* data);
96
+
97
+ template
98
+ int HDF5ReadRowsToBlob<double >(hid_t file_id, const char * dataset_name,
99
+ int h5_offset, int blob_offset, Blob<double >* data);
100
+
8
101
// Verifies format of data stored in HDF5 file and reshapes blob accordingly.
9
102
template <typename Dtype>
10
103
void hdf5_load_nd_dataset_helper (
@@ -59,7 +152,7 @@ void hdf5_save_nd_dataset<float>(
59
152
const hid_t file_id, const string& dataset_name, const Blob<float >& blob,
60
153
bool write_diff) {
61
154
int num_axes = blob.num_axes ();
62
- hsize_t * dims = new hsize_t [ num_axes] ;
155
+ std::vector< hsize_t > dims ( num_axes) ;
63
156
for (int i = 0 ; i < num_axes; ++i) {
64
157
dims[i] = blob.shape (i);
65
158
}
@@ -70,17 +163,16 @@ void hdf5_save_nd_dataset<float>(
70
163
data = blob.cpu_data ();
71
164
}
72
165
herr_t status = H5LTmake_dataset_float (
73
- file_id, dataset_name.c_str (), num_axes, dims, data);
166
+ file_id, dataset_name.c_str (), num_axes, dims. data () , data);
74
167
CHECK_GE (status, 0 ) << " Failed to make float dataset " << dataset_name;
75
- delete[] dims;
76
168
}
77
169
78
170
template <>
79
171
void hdf5_save_nd_dataset<double >(
80
172
hid_t file_id, const string& dataset_name, const Blob<double >& blob,
81
173
bool write_diff) {
82
174
int num_axes = blob.num_axes ();
83
- hsize_t * dims = new hsize_t [ num_axes] ;
175
+ std::vector< hsize_t > dims ( num_axes) ;
84
176
for (int i = 0 ; i < num_axes; ++i) {
85
177
dims[i] = blob.shape (i);
86
178
}
@@ -91,9 +183,8 @@ void hdf5_save_nd_dataset<double>(
91
183
data = blob.cpu_data ();
92
184
}
93
185
herr_t status = H5LTmake_dataset_double (
94
- file_id, dataset_name.c_str (), num_axes, dims, data);
186
+ file_id, dataset_name.c_str (), num_axes, dims. data () , data);
95
187
CHECK_GE (status, 0 ) << " Failed to make double dataset " << dataset_name;
96
- delete[] dims;
97
188
}
98
189
99
190
string hdf5_load_string (hid_t loc_id, const string& dataset_name) {
0 commit comments