gambit is hosted by Hepforge, IPPP Durham
GAMBIT  v1.5.0-252-gf9a3f78
a Global And Modular Bsm Inference Tool
hdf5tools Namespace Reference

Functions

def get_dset_lengths (d, group, dsets)
 
def check_lengths (d)
 
def copy_dset (indset, outdset, nextempty, basemsg="")
 
def copy_dset_whole (indset, outdset, nextempty, basemsg="")
 
def cantor_pairing (x, y)
 
def check_for_duplicates (fout, group)
 

Variables

int chunksize = 1000
 
int bufferlength = 100
 
int max_ppidpairs = 10*bufferlength
 

Detailed Description

Tools for reading/writing data from HDF5 files

Function Documentation

◆ cantor_pairing()

def hdf5tools.cantor_pairing (   x,
  y 
)

Definition at line 67 of file hdf5tools.py.

Referenced by check_for_duplicates().

67 def cantor_pairing(x,y):
68  return (x+y)*(x+y+1)//2 + y
69 
70 # Check for duplicate entries in datasets
def cantor_pairing(x, y)
Definition: hdf5tools.py:67
Here is the caller graph for this function:

◆ check_for_duplicates()

def hdf5tools.check_for_duplicates (   fout,
  group 
)

Definition at line 71 of file hdf5tools.py.

References cantor_pairing(), and Gambit::Scanner.zip().

Referenced by Gambit::DescriptionDatabase.loadFile().

71 def check_for_duplicates(fout,group):
72  pointIDs_out = fout[group]["pointID"]
73  mpiranks_out = fout[group]["MPIrank"]
74  pointIDs_isvalid_out = np.array(fout[group]["pointID_isvalid"][:],dtype=np.bool)
75  mpiranks_isvalid_out = np.array(fout[group]["MPIrank_isvalid"][:],dtype=np.bool)
76  mask_out = (pointIDs_isvalid_out & mpiranks_isvalid_out)
77  # convert entries to single values to facilitate fast comparison
78  IDs_out = cantor_pairing(
79  np.array(pointIDs_out[mask_out],dtype=np.longlong),
80  np.array(mpiranks_out[mask_out],dtype=np.longlong)
81  )
82  ids = IDs_out
83  pid = pointIDs_out[mask_out]
84  rank = mpiranks_out[mask_out]
85  error = False
86  for ID,p,r in zip(ids,pid,rank):
87  if(p==1 and r==0):
88  print " Spotted first entry ({0},{1})".format(r,p)
89  Nmatches = np.sum(ID==ids)
90  if Nmatches>1:
91  print " Error! ID", ID, "is duplicated {0} times!".format(Nmatches)
92  error = True
93  matches = (p==pid) & (r==rank)
94  Nmatches2 = np.sum(matches)
95  if Nmatches2>1:
96  print " ...MPIrank/pointID ({0},{1}) duplicate count: {2}".format(r,p,Nmatches2)
97  dup_locs = np.where(matches)
98  print " Indices of duplicates are:", dup_locs
99  else:
100  print " ...No duplicate pid and rank pairs detected! This seems to indicate that something is screwed up in the Cantor pairing"
101 
102  if error==True:
103  raise ValueError("Duplicates detected in output dataset!")
104 
105 
106 
107 
def check_for_duplicates(fout, group)
Definition: hdf5tools.py:71
auto zip(const T &... containers) -> boost::iterator_range< boost::zip_iterator< decltype(boost::make_tuple(std::begin(containers)...))>>
Use for combine container in a range loop: for (auto &&x : zip(a, b)){...}.
def cantor_pairing(x, y)
Definition: hdf5tools.py:67
Here is the call graph for this function:
Here is the caller graph for this function:

◆ check_lengths()

def hdf5tools.check_lengths (   d)

Definition at line 28 of file hdf5tools.py.

28 def check_lengths(d):
29  length = None
30  for key,value in d.items():
31  if length==None:
32  length=value
33  elif length!=value:
34  raise ValueError("Length of dataset '{0}' is inconsistent with the others in the target group! (length was {1}; previous dataset had length={2})".format(key,value,length))
35  if length==None:
36  # No datasets found; manually set length to zero
37  length = 0
38  return length
39 
def check_lengths(d)
Definition: hdf5tools.py:28

◆ copy_dset()

def hdf5tools.copy_dset (   indset,
  outdset,
  nextempty,
  basemsg = "" 
)

Definition at line 40 of file hdf5tools.py.

References int.

40 def copy_dset(indset,outdset,nextempty,basemsg=""):
41  lengthtocopy = indset.shape[0]
42  chunksleft = math.ceil(lengthtocopy/float(chunksize))
43  remainder = lengthtocopy % chunksize
44  start = 0
45  stride = chunksize
46  Nchunks=chunksleft
47  while(chunksleft!=0):
48  if(chunksleft % 1 == 0):
49  sys.stdout.write("{0}: {1}% \r".format(basemsg,int(100*(Nchunks-chunksleft)/Nchunks)))
50  sys.stdout.flush()
51  if(remainder!=0 and chunksleft==1):
52  stride=remainder
53  outdset[nextempty+start:nextempty+start+stride] = indset[start:start+stride]
54  start+=stride
55  chunksleft-=1
56 
57 # As above, but copied the whole input dataset into memory before writing it out
58 # This is mainly for performance comparison purposes.
def copy_dset(indset, outdset, nextempty, basemsg="")
Definition: hdf5tools.py:40
DS5_MSPCTM DS_INTDOF int

◆ copy_dset_whole()

def hdf5tools.copy_dset_whole (   indset,
  outdset,
  nextempty,
  basemsg = "" 
)

Definition at line 59 of file hdf5tools.py.

59 def copy_dset_whole(indset,outdset,nextempty,basemsg=""):
60  lengthtocopy = indset.shape[0]
61  start = 0
62  sys.stdout.write("{0} \r".format(basemsg))
63  sys.stdout.flush()
64  outdset[nextempty:nextempty+lengthtocopy] = indset
65 
66 # Combine two integers into one integer with unique mapping
def copy_dset_whole(indset, outdset, nextempty, basemsg="")
Definition: hdf5tools.py:59

◆ get_dset_lengths()

def hdf5tools.get_dset_lengths (   d,
  group,
  dsets 
)

Definition at line 14 of file hdf5tools.py.

14 def get_dset_lengths(d,group,dsets):
15  for itemname in group:
16  item = group[itemname]
17  if isinstance(item,h5py.Dataset):
18  #print itemname,"is a Dataset"
19  dsets.add((itemname,item.dtype))
20  if itemname in d:
21  d[itemname] += item.shape[0]
22  else:
23  d[itemname] = item.shape[0]
24  if isinstance(item,h5py.Group):
25  #print itemname,"is a Group"
26  pass
27 
def get_dset_lengths(d, group, dsets)
Definition: hdf5tools.py:14

Variable Documentation

◆ bufferlength

int hdf5tools.bufferlength = 100

Definition at line 11 of file hdf5tools.py.

◆ chunksize

int hdf5tools.chunksize = 1000

Definition at line 10 of file hdf5tools.py.

◆ max_ppidpairs

int hdf5tools.max_ppidpairs = 10*bufferlength

Definition at line 12 of file hdf5tools.py.