combine_hdf5.py
Go to the documentation of this file.
6 print 'CTRL+C is blocked while the HDF5Printer combine script runs! Signal received, but ignored.' 16 chunksize = 1000 22 print " Usage: python combine_hdf5.py <path-to-target-hdf5-file> <root group in hdf5 files> <tmp file 1> <tmp file 2> ..." 24 print " Attempts to combine the data in a group of hdf5 files produced by HDF5Printer in separate processes during a GAMBIT run." 26 print " Use --runchecks flag to run some extra validity checks on the input and output data (warning: may be slow for large datasets)" 47 outfname = sys.argv[1] 48 group = sys.argv[2] 49 tmp_files = sys.argv[3:] 50 N = len(tmp_files) 61 files = {} 68 fnames = tmp_files 78 datasets = [] 79 tmp_dset_metadata = {} 80 tmp_RA_dset_metadata = {} 98 total_sync_length = sum(sync_lengths) 129 gout = fout.create_group(group) 137 existing_dsets = {} 138 dsetlengths = {} 155 print " Extending existing dset '{0}' to length {1}+{2}={3} to accommodate new data...".format(dsetname,init_output_length,total_sync_length,init_output_length+total_sync_length) 159 target_dsets = {} 160 all_sync_dsets = set([]).union(*sync_dsets) 161 all_RA_dsets = set([]).union(*RA_dsets) 165 target_dsets[dsetname] = gout.create_dataset(dsetname, (init_output_length+total_sync_length,), chunks=(chunksize,), dtype=dt, maxshape=(None,)) 172 target_dsets[dsetname] = gout.create_dataset(dsetname, (init_output_length+total_sync_length,), chunks=(chunksize,), dtype=dt, maxshape=(None,)) 177 nextempty=init_output_length 183 fin = files[fname] 192 item = fin[group][itemname] 220 nchunks = np.ceil(dset_length / (1.*max_ppidpairs)) 222 imin = i*max_ppidpairs 223 imax = np.min([(i+1)*max_ppidpairs, dset_length]) 230 mask_in = (pointIDs_isvalid_in & mpiranks_isvalid_in) 258 mask_out = (pointIDs_isvalid_out & mpiranks_isvalid_out) 268 raise ValueError("Error while computing cantor pairing for RA to SYNC matching! Integer overflow detected, so matching will fail! Please increase the size of the integer type used!") 272 ids = IDs_out 273 pid = pointIDs_out[mask_out] 274 rank = mpiranks_out[mask_out] 276 Nmatches = np.sum(ID==ids) 279 Match = np.sum((p==pid) & (r==rank)) 284 target_mask_small = np.in1d(IDs_out,IDs_in) 287 # (TODO: is there a more efficient way to do this? "target_mask[mask_out][target_mask_small] = True" does not work.) 289 alltargetindices = np.arange(target_length) 290 maskindices = alltargetindices[mask_out][target_mask_small] 292 target_mask = np.zeros(target_length, dtype=bool) 296 print " Double-checking that all selected input dset entries have matching targets in the output dsets..." 300 indexid = np.where( (np.array(IDs_out)==ID) ) 301 index = np.where( (np.array(pointIDs_out[mask_out])==pid) & 314 # Number of "true" elements in target mask should match number of elements in 'in' arrays (after masking) 316 ntargets = np.sum(target_mask) 317 nsources = np.sum(mask_in) 319 raise ValueError("Error while computing targets for RA writes! Number of target matches for writes in the output dataset ({0}) does not match the number of elements scheduled for copying {1}!".format(ntargets,nsources)) 368 xsort = np.argsort(IDs_in) 369 yindex = np.searchsorted(IDs_in[xsort], IDs_out[target_mask_small]) 370 fancyindices = xsort[yindex] 377 print " Warning! Mismatch found. At position {0}, input ID ({1}) != output ID ({2})".format(k,i,j) 391 indset = item[imin:imax] 393 outdset = fout[group][itemname] 396 raise ValueError("Type mismatch detected between dataset {0} in file {1} (dtype={2}), and matching dataset in output file {3} (dtype={3})".format(itemname,fname,indset.dtype,outfname,outdset.dtype)) 397 #can't do the fancy list-indexing directly on the hdf5 dataset (the boolean assignment should be ok though)
def copy_dset_whole(indset, outdset, nextempty, basemsg="") Definition: hdf5tools.py:59 auto zip(const T &... containers) -> boost::iterator_range< boost::zip_iterator< decltype(boost::make_tuple(std::begin(containers)...))>> Use for combine container in a range loop: for (auto &&x : zip(a, b)){...}. Definition: scanner_utils.hpp:125 |