gambit is hosted by Hepforge, IPPP Durham
GAMBIT  v1.5.0-252-gf9a3f78
a Global And Modular Bsm Inference Tool
hdf5tools.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 """Tools for reading/writing data from HDF5 files"""
4 
5 import h5py
6 import math
7 import sys
8 
9 # Buffer variables
10 chunksize = 1000
11 bufferlength = 100 # Must match setting in hdf5printer.hpp
12 max_ppidpairs = 10*bufferlength # " "
13 
14 def get_dset_lengths(d,group,dsets):
15  for itemname in group:
16  item = group[itemname]
17  if isinstance(item,h5py.Dataset):
18  #print itemname,"is a Dataset"
19  dsets.add((itemname,item.dtype))
20  if itemname in d:
21  d[itemname] += item.shape[0]
22  else:
23  d[itemname] = item.shape[0]
24  if isinstance(item,h5py.Group):
25  #print itemname,"is a Group"
26  pass
27 
29  length = None
30  for key,value in d.items():
31  if length==None:
32  length=value
33  elif length!=value:
34  raise ValueError("Length of dataset '{0}' is inconsistent with the others in the target group! (length was {1}; previous dataset had length={2})".format(key,value,length))
35  if length==None:
36  # No datasets found; manually set length to zero
37  length = 0
38  return length
39 
40 def copy_dset(indset,outdset,nextempty,basemsg=""):
41  lengthtocopy = indset.shape[0]
42  chunksleft = math.ceil(lengthtocopy/float(chunksize))
43  remainder = lengthtocopy % chunksize
44  start = 0
45  stride = chunksize
46  Nchunks=chunksleft
47  while(chunksleft!=0):
48  if(chunksleft % 1 == 0):
49  sys.stdout.write("{0}: {1}% \r".format(basemsg,int(100*(Nchunks-chunksleft)/Nchunks)))
50  sys.stdout.flush()
51  if(remainder!=0 and chunksleft==1):
52  stride=remainder
53  outdset[nextempty+start:nextempty+start+stride] = indset[start:start+stride]
54  start+=stride
55  chunksleft-=1
56 
57 # As above, but copied the whole input dataset into memory before writing it out
58 # This is mainly for performance comparison purposes.
59 def copy_dset_whole(indset,outdset,nextempty,basemsg=""):
60  lengthtocopy = indset.shape[0]
61  start = 0
62  sys.stdout.write("{0} \r".format(basemsg))
63  sys.stdout.flush()
64  outdset[nextempty:nextempty+lengthtocopy] = indset
65 
66 # Combine two integers into one integer with unique mapping
67 def cantor_pairing(x,y):
68  return (x+y)*(x+y+1)//2 + y
69 
70 # Check for duplicate entries in datasets
71 def check_for_duplicates(fout,group):
72  pointIDs_out = fout[group]["pointID"]
73  mpiranks_out = fout[group]["MPIrank"]
74  pointIDs_isvalid_out = np.array(fout[group]["pointID_isvalid"][:],dtype=np.bool)
75  mpiranks_isvalid_out = np.array(fout[group]["MPIrank_isvalid"][:],dtype=np.bool)
76  mask_out = (pointIDs_isvalid_out & mpiranks_isvalid_out)
77  # convert entries to single values to facilitate fast comparison
78  IDs_out = cantor_pairing(
79  np.array(pointIDs_out[mask_out],dtype=np.longlong),
80  np.array(mpiranks_out[mask_out],dtype=np.longlong)
81  )
82  ids = IDs_out
83  pid = pointIDs_out[mask_out]
84  rank = mpiranks_out[mask_out]
85  error = False
86  for ID,p,r in zip(ids,pid,rank):
87  if(p==1 and r==0):
88  print " Spotted first entry ({0},{1})".format(r,p)
89  Nmatches = np.sum(ID==ids)
90  if Nmatches>1:
91  print " Error! ID", ID, "is duplicated {0} times!".format(Nmatches)
92  error = True
93  matches = (p==pid) & (r==rank)
94  Nmatches2 = np.sum(matches)
95  if Nmatches2>1:
96  print " ...MPIrank/pointID ({0},{1}) duplicate count: {2}".format(r,p,Nmatches2)
97  dup_locs = np.where(matches)
98  print " Indices of duplicates are:", dup_locs
99  else:
100  print " ...No duplicate pid and rank pairs detected! This seems to indicate that something is screwed up in the Cantor pairing"
101 
102  if error==True:
103  raise ValueError("Duplicates detected in output dataset!")
104 
105 
106 
def check_for_duplicates(fout, group)
Definition: hdf5tools.py:71
def copy_dset(indset, outdset, nextempty, basemsg="")
Definition: hdf5tools.py:40
def copy_dset_whole(indset, outdset, nextempty, basemsg="")
Definition: hdf5tools.py:59
auto zip(const T &... containers) -> boost::iterator_range< boost::zip_iterator< decltype(boost::make_tuple(std::begin(containers)...))>>
Use for combine container in a range loop: for (auto &&x : zip(a, b)){...}.
def get_dset_lengths(d, group, dsets)
Definition: hdf5tools.py:14
def check_lengths(d)
Definition: hdf5tools.py:28
DS5_MSPCTM DS_INTDOF int
def cantor_pairing(x, y)
Definition: hdf5tools.py:67