gambit is hosted by Hepforge, IPPP Durham
GAMBIT  v1.5.0-252-gf9a3f78
a Global And Modular Bsm Inference Tool
combine_hdf5 Namespace Reference

Functions

def sigint_handler (signum, frame)
 
def usage ()
 

Variables

int chunksize = 1000
 
int bufferlength = 100
 
int max_ppidpairs = 10*bufferlength
 
bool runchecks = True
 
bool delete_tmp = True
 
 outfname = sys.argv[1]
 
 group = sys.argv[2]
 
 tmp_files = sys.argv[3:]
 
 N = len(tmp_files)
 
string RA_group = group + "/RA"
 
dictionary files = {}
 
list sync_dsets = [set([]) for i in range(N)]
 
list RA_dsets = [set([]) for i in range(N)]
 
 RA_dsets_exclude = set(["RA_pointID","RA_pointID_isvalid","RA_MPIrank","RA_MPIrank_isvalid"])
 
list sync_lengths = [0 for i in range(N)]
 
list RA_lengths = [0 for i in range(N)]
 
 fnames = tmp_files
 
 f = h5py.File(fname,'r')
 
list datasets = []
 
dictionary tmp_dset_metadata = {}
 
dictionary tmp_RA_dset_metadata = {}
 
 total_sync_length = sum(sync_lengths)
 
 fout = h5py.File(outfname,'a')
 
 gout = fout.create_group(group)
 
dictionary existing_dsets = {}
 
dictionary dsetlengths = {}
 
 init_output_length = check_lengths(dsetlengths)
 
dictionary target_dsets = {}
 
 all_sync_dsets = set([]).union(*sync_dsets)
 
 all_RA_dsets = set([]).union(*RA_dsets)
 
 chunks
 
 dtype
 
 dt
 
 maxshape
 
 nextempty = init_output_length
 
dictionary fin = files[fname]
 
 dset_length = None
 
dictionary item = fin[group][itemname]
 
 nchunks = np.ceil(dset_length / (1.*max_ppidpairs))
 
int imin = i*max_ppidpairs
 
 imax = np.min([(i+1)*max_ppidpairs, dset_length])
 
dictionary pointIDs_in = fin[RA_group]["RA_pointID"][imin:imax]
 
dictionary mpiranks_in = fin[RA_group]["RA_MPIrank"][imin:imax]
 
 pointIDs_isvalid_in = np.array(fin[RA_group]["RA_pointID_isvalid"][imin:imax],dtype=np.bool)
 
 mpiranks_isvalid_in = np.array(fin[RA_group]["RA_MPIrank_isvalid"][imin:imax],dtype=np.bool)
 
tuple mask_in = (pointIDs_isvalid_in & mpiranks_isvalid_in)
 
 IDs_in = cantor_pairing(pointIDs_in[mask_in],mpiranks_in[mask_in])
 
 pointIDs_out = fout[group]["pointID"]
 
 mpiranks_out = fout[group]["MPIrank"]
 
 pointIDs_isvalid_out = np.array(fout[group]["pointID_isvalid"][:],dtype=np.bool)
 
 mpiranks_isvalid_out = np.array(fout[group]["MPIrank_isvalid"][:],dtype=np.bool)
 
tuple mask_out = (pointIDs_isvalid_out & mpiranks_isvalid_out)
 
 IDs_out
 
 ids = IDs_out
 
 pid = pointIDs_out[mask_out]
 
 rank = mpiranks_out[mask_out]
 
 Nmatches = np.sum(ID==ids)
 
 Match = np.sum((p==pid) & (r==rank))
 
 target_mask_small = np.in1d(IDs_out,IDs_in)
 
 target_length = fout[group]["pointID"].shape[0]
 
 alltargetindices = np.arange(target_length)
 
 maskindices = alltargetindices[mask_out][target_mask_small]
 
 target_mask = np.zeros(target_length, dtype=bool)
 
 indexid = np.where( (np.array(IDs_out)==ID) )
 
 index
 
 ntargets = np.sum(target_mask)
 
 nsources = np.sum(mask_in)
 
 xsort = np.argsort(IDs_in)
 Just some test code which I decided to keep around since it is helpful for understand how the rearrangment of the input data to match the output selection works. More...
 
 yindex = np.searchsorted(IDs_in[xsort], IDs_out[target_mask_small])
 
 fancyindices = xsort[yindex]
 
dictionary indset = item[imin:imax]
 
 outdset = fout[group][itemname]
 

Detailed Description

Combine data from several hdf5 files created by HDF5Printer into a single file

Function Documentation

◆ sigint_handler()

def combine_hdf5.sigint_handler (   signum,
  frame 
)

Definition at line 5 of file combine_hdf5.py.

5 def sigint_handler(signum, frame):
6  print 'CTRL+C is blocked while the HDF5Printer combine script runs! Signal received, but ignored.'
7 signal.signal(signal.SIGINT, sigint_handler)
8 
def sigint_handler(signum, frame)
Definition: combine_hdf5.py:5

◆ usage()

def combine_hdf5.usage ( )

Definition at line 21 of file combine_hdf5.py.

21 def usage():
22  print " Usage: python combine_hdf5.py <path-to-target-hdf5-file> <root group in hdf5 files> <tmp file 1> <tmp file 2> ..."
23  print
24  print " Attempts to combine the data in a group of hdf5 files produced by HDF5Printer in separate processes during a GAMBIT run."
25  print " Use --delete_tmp flag to delete input files upon successful combination."
26  print " Use --runchecks flag to run some extra validity checks on the input and output data (warning: may be slow for large datasets)"
27  exit(1)
28 
29 #====Begin "main"=================================
30 
31 #if len(sys.argv)!=6 and len(sys.argv)!=7: usage()
32 #
33 runchecks=False
34 delete_tmp=False
void usage()

Variable Documentation

◆ all_RA_dsets

combine_hdf5.all_RA_dsets = set([]).union(*RA_dsets)

Definition at line 161 of file combine_hdf5.py.

◆ all_sync_dsets

combine_hdf5.all_sync_dsets = set([]).union(*sync_dsets)

Definition at line 160 of file combine_hdf5.py.

◆ alltargetindices

combine_hdf5.alltargetindices = np.arange(target_length)

Definition at line 289 of file combine_hdf5.py.

◆ bufferlength

◆ chunks

combine_hdf5.chunks

Definition at line 165 of file combine_hdf5.py.

◆ chunksize

int combine_hdf5.chunksize = 1000

Definition at line 16 of file combine_hdf5.py.

Referenced by Gambit::PostProcessor.get_done_points().

◆ datasets

list combine_hdf5.datasets = []

◆ delete_tmp

bool combine_hdf5.delete_tmp = True

Definition at line 44 of file combine_hdf5.py.

◆ dset_length

◆ dsetlengths

dictionary combine_hdf5.dsetlengths = {}

Definition at line 138 of file combine_hdf5.py.

◆ dt

combine_hdf5.dt

Definition at line 165 of file combine_hdf5.py.

◆ dtype

◆ existing_dsets

dictionary combine_hdf5.existing_dsets = {}

Definition at line 137 of file combine_hdf5.py.

◆ f

◆ fancyindices

combine_hdf5.fancyindices = xsort[yindex]

Definition at line 370 of file combine_hdf5.py.

◆ files

dictionary combine_hdf5.files = {}

Definition at line 61 of file combine_hdf5.py.

◆ fin

◆ fnames

combine_hdf5.fnames = tmp_files

Definition at line 68 of file combine_hdf5.py.

◆ fout

combine_hdf5.fout = h5py.File(outfname,'a')

◆ gout

combine_hdf5.gout = fout.create_group(group)

Definition at line 129 of file combine_hdf5.py.

◆ group

◆ ids

combine_hdf5.ids = IDs_out

Definition at line 272 of file combine_hdf5.py.

Referenced by Gambit::Scanner.TWalk().

◆ IDs_in

combine_hdf5.IDs_in = cantor_pairing(pointIDs_in[mask_in],mpiranks_in[mask_in])

Definition at line 234 of file combine_hdf5.py.

◆ IDs_out

combine_hdf5.IDs_out
Initial value:
2  np.array(pointIDs_out[mask_out],dtype=np.longlong),
3  np.array(mpiranks_out[mask_out],dtype=np.longlong)
4  )
def cantor_pairing(x, y)
Definition: hdf5tools.py:67

Definition at line 262 of file combine_hdf5.py.

◆ imax

combine_hdf5.imax = np.min([(i+1)*max_ppidpairs, dset_length])

Definition at line 223 of file combine_hdf5.py.

Referenced by Gambit::DarkBit::SimpleHist.addBox().

◆ imin

int combine_hdf5.imin = i*max_ppidpairs

Definition at line 222 of file combine_hdf5.py.

Referenced by Gambit::DarkBit::SimpleHist.addBox().

◆ index

◆ indexid

combine_hdf5.indexid = np.where( (np.array(IDs_out)==ID) )

Definition at line 300 of file combine_hdf5.py.

◆ indset

dictionary combine_hdf5.indset = item[imin:imax]

Definition at line 391 of file combine_hdf5.py.

◆ init_output_length

int combine_hdf5.init_output_length = check_lengths(dsetlengths)

Definition at line 147 of file combine_hdf5.py.

◆ item

dictionary combine_hdf5.item = fin[group][itemname]

Definition at line 192 of file combine_hdf5.py.

Referenced by Gambit::Printers::asciiPrinter.dump_buffer().

◆ mask_in

tuple combine_hdf5.mask_in = (pointIDs_isvalid_in & mpiranks_isvalid_in)

Definition at line 230 of file combine_hdf5.py.

◆ mask_out

tuple combine_hdf5.mask_out = (pointIDs_isvalid_out & mpiranks_isvalid_out)

Definition at line 258 of file combine_hdf5.py.

◆ maskindices

combine_hdf5.maskindices = alltargetindices[mask_out][target_mask_small]

Definition at line 290 of file combine_hdf5.py.

◆ Match

combine_hdf5.Match = np.sum((p==pid) & (r==rank))

Definition at line 279 of file combine_hdf5.py.

◆ max_ppidpairs

int combine_hdf5.max_ppidpairs = 10*bufferlength

Definition at line 19 of file combine_hdf5.py.

◆ maxshape

combine_hdf5.maxshape

Definition at line 165 of file combine_hdf5.py.

◆ mpiranks_in

dictionary combine_hdf5.mpiranks_in = fin[RA_group]["RA_MPIrank"][imin:imax]

Definition at line 226 of file combine_hdf5.py.

◆ mpiranks_isvalid_in

combine_hdf5.mpiranks_isvalid_in = np.array(fin[RA_group]["RA_MPIrank_isvalid"][imin:imax],dtype=np.bool)

Definition at line 228 of file combine_hdf5.py.

◆ mpiranks_isvalid_out

combine_hdf5.mpiranks_isvalid_out = np.array(fout[group]["MPIrank_isvalid"][:],dtype=np.bool)

Definition at line 256 of file combine_hdf5.py.

◆ mpiranks_out

combine_hdf5.mpiranks_out = fout[group]["MPIrank"]

Definition at line 254 of file combine_hdf5.py.

◆ N

◆ nchunks

combine_hdf5.nchunks = np.ceil(dset_length / (1.*max_ppidpairs))

Definition at line 220 of file combine_hdf5.py.

◆ nextempty

combine_hdf5.nextempty = init_output_length

Definition at line 177 of file combine_hdf5.py.

◆ Nmatches

combine_hdf5.Nmatches = np.sum(ID==ids)

Definition at line 276 of file combine_hdf5.py.

◆ nsources

combine_hdf5.nsources = np.sum(mask_in)

Definition at line 317 of file combine_hdf5.py.

◆ ntargets

combine_hdf5.ntargets = np.sum(target_mask)

Definition at line 316 of file combine_hdf5.py.

◆ outdset

combine_hdf5.outdset = fout[group][itemname]

Definition at line 393 of file combine_hdf5.py.

◆ outfname

combine_hdf5.outfname = sys.argv[1]

Definition at line 47 of file combine_hdf5.py.

◆ pid

◆ pointIDs_in

dictionary combine_hdf5.pointIDs_in = fin[RA_group]["RA_pointID"][imin:imax]

Definition at line 225 of file combine_hdf5.py.

◆ pointIDs_isvalid_in

combine_hdf5.pointIDs_isvalid_in = np.array(fin[RA_group]["RA_pointID_isvalid"][imin:imax],dtype=np.bool)

Definition at line 227 of file combine_hdf5.py.

◆ pointIDs_isvalid_out

combine_hdf5.pointIDs_isvalid_out = np.array(fout[group]["pointID_isvalid"][:],dtype=np.bool)

Definition at line 255 of file combine_hdf5.py.

◆ pointIDs_out

combine_hdf5.pointIDs_out = fout[group]["pointID"]

Definition at line 253 of file combine_hdf5.py.

◆ RA_dsets

list combine_hdf5.RA_dsets = [set([]) for i in range(N)]

Definition at line 64 of file combine_hdf5.py.

◆ RA_dsets_exclude

combine_hdf5.RA_dsets_exclude = set(["RA_pointID","RA_pointID_isvalid","RA_MPIrank","RA_MPIrank_isvalid"])

Definition at line 65 of file combine_hdf5.py.

◆ RA_group

string combine_hdf5.RA_group = group + "/RA"

Definition at line 51 of file combine_hdf5.py.

◆ RA_lengths

list combine_hdf5.RA_lengths = [0 for i in range(N)]

Definition at line 67 of file combine_hdf5.py.

◆ rank

◆ runchecks

bool combine_hdf5.runchecks = True

Definition at line 40 of file combine_hdf5.py.

◆ sync_dsets

list combine_hdf5.sync_dsets = [set([]) for i in range(N)]

Definition at line 63 of file combine_hdf5.py.

◆ sync_lengths

list combine_hdf5.sync_lengths = [0 for i in range(N)]

Definition at line 66 of file combine_hdf5.py.

◆ target_dsets

dictionary combine_hdf5.target_dsets = {}

Definition at line 159 of file combine_hdf5.py.

◆ target_length

combine_hdf5.target_length = fout[group]["pointID"].shape[0]

Definition at line 288 of file combine_hdf5.py.

◆ target_mask

combine_hdf5.target_mask = np.zeros(target_length, dtype=bool)

Definition at line 292 of file combine_hdf5.py.

◆ target_mask_small

combine_hdf5.target_mask_small = np.in1d(IDs_out,IDs_in)

Definition at line 284 of file combine_hdf5.py.

◆ tmp_dset_metadata

dictionary combine_hdf5.tmp_dset_metadata = {}

Definition at line 79 of file combine_hdf5.py.

◆ tmp_files

◆ tmp_RA_dset_metadata

dictionary combine_hdf5.tmp_RA_dset_metadata = {}

Definition at line 80 of file combine_hdf5.py.

◆ total_sync_length

combine_hdf5.total_sync_length = sum(sync_lengths)

Definition at line 98 of file combine_hdf5.py.

◆ xsort

combine_hdf5.xsort = np.argsort(IDs_in)

Just some test code which I decided to keep around since it is helpful for understand how the rearrangment of the input data to match the output selection works.

# Compute sorting index array for rearranging the source entries to match the target locations
# The way this works is a bit trippy, but it is fast.
#y = target (IDs_out)
#x = sources (IDs_in)
#result = array of length(y), containing positions of
#e.g.
#x = np.array([3,5,7,1 ,9 ,8,6,6])
#y = np.array([2,1,5,10,100,6])
# out = [ - 3 1 - - 6 ]
# i.e. "1" in y, is in position index 3 of x.
x1 = np.array([0,1,2,3,4,5,6,7,8])
y1 = np.array([4,3,5,0,1,2,6,8,7])
# where in x is each element of y?
# should give back y, since e.g. 4 is at index 4 in x
# then using ypos as indices on x[sort1], should again return y.
xsort1 = np.argsort(x1)
ypos1 = np.searchsorted(x1[xsort1], y1)
print "verifying..."
print y1
print ypos1
print xsort1[ypos1]
# less trivial test:
x2 = np.array([0,1,7,2,7,8,6,4])
y2 = np.array([4,0,1,2,6,8,7])
# indices array should be:
# [8,1,2,3,7,5,2]
# (but in our case we don't want duplicates in either the target or input arrays!)
xsort2 = np.argsort(x2)
ypos2 = np.searchsorted(x2[xsort2], y2)
indices = xsort2[ypos2]
print "verifying..."
print x2
print xsort2
print x2[xsort2]
print "y2"
print y2
print ypos2
print indices
print x2[indices]

Definition at line 368 of file combine_hdf5.py.

◆ yindex

combine_hdf5.yindex = np.searchsorted(IDs_in[xsort], IDs_out[target_mask_small])

Definition at line 369 of file combine_hdf5.py.