CUDA/HEMI kernels supporting NLL calculation. More...
#include <cuda.h>
#include <hemi/hemi.h>
#include <curand_kernel.h>
#include <hemi/array.h>
Go to the source code of this file.
Typedefs | |
typedef curandStateXORWOW | RNGState |
Defines RNG for CURAND, ignored in CPU mode. | |
Functions | |
__global__ void | init_device_rngs (int nthreads, unsigned long long seed, curandState *state) |
HEMI_KERNEL() | pick_new_vector (int nthreads, RNGState *rng, float sigma, const float *current_vector, float *proposed_vector) |
HEMI_KERNEL() | jump_decider (RNGState *rng, double *nll_current, const double *nll_proposed, float *v_current, const float *v_proposed, unsigned ns, int *counter, float *jump_buffer) |
HEMI_KERNEL() | nll_event_chunks (const float *lut, const float *pars, const size_t ne, const size_t ns, double *sums) |
HEMI_KERNEL() | nll_event_reduce (const size_t nthreads, const double *sums, double *total_sum) |
HEMI_KERNEL() | nll_total (const size_t ns, const float *pars, const float *expectations, const float *constraints, const double *events_total, double *nll) |
CUDA/HEMI kernels supporting NLL calculation.
__global__ void init_device_rngs | ( | int | nthreads, | |
unsigned long long | seed, | |||
curandState * | state | |||
) |
Initialize device-side RNGs.
Generators all have the same seed but a different offset in the sequence.
nthreads | Number of threads (same as the number of states) | |
seed | Random seed shared by all generators | |
state | Array of CUDA RNG states |
HEMI_KERNEL() jump_decider | ( | RNGState * | rng, | |
double * | nll_current, | |||
const double * | nll_proposed, | |||
float * | v_current, | |||
const float * | v_proposed, | |||
unsigned | ns, | |||
int * | counter, | |||
float * | jump_buffer | |||
) |
Decide whether to accept a random MCMC step
Compare likelihoods of current and proposed parameter vectors. If the step is accepted, store it in a buffer which can be flushed periodically, minimizing transfer overhead.
The step buffer is an (Nsignals + 1 x Nsteps) matrix, where the last column contains the likelihood value.
rng | Random-number generator states, used in GPU mode only | |
nll_current | The NLL of the current parameters | |
nll_proposed | the NLL of the proposed parameters | |
v_current | The current parameters | |
v_proposed | The proposed parameters | |
ns | The number of signals | |
counter | The number of steps in the buffer | |
jump_buffer | The step buffer |
HEMI_KERNEL() nll_event_chunks | ( | const float * | lut, | |
const float * | pars, | |||
const size_t | ne, | |||
const size_t | ns, | |||
double * | sums | |||
) |
NLL Part 1
Calculate -sum(log(sum(Nj * Pj(xi)))) contribution to NLL.
lut | Pj(xi) lookup table | |
pars | Event rates (normalizations) for each signal | |
ne | Number of events in the data | |
ns | Number of signals | |
sums | Output sums for subsets of events |
HEMI_KERNEL() nll_event_reduce | ( | const size_t | nthreads, | |
const double * | sums, | |||
double * | total_sum | |||
) |
NLL Part 2
Total up the partial sums from Part 1
nthreads | Number of threads == number of sums to total | |
sums | The partial sums | |
total_sum | Output: the total sum |
HEMI_KERNEL() nll_total | ( | const size_t | ns, | |
const float * | pars, | |||
const float * | expectations, | |||
const float * | constraints, | |||
const double * | events_total, | |||
double * | nll | |||
) |
NLL Part 3
Calculate overall normalization and constraints contributions to NLL, add in the event term to get the total.
ns | Number of signals | |
pars | Event rates (normalizations) for each signal | |
expectations | Expected rates for each signal | |
constraints | Fractional constraints for each signal | |
events_total | Sum of event term contribution | |
nll | The total NLL |
HEMI_KERNEL() pick_new_vector | ( | int | nthreads, | |
RNGState * | rng, | |||
float | sigma, | |||
const float * | current_vector, | |||
float * | proposed_vector | |||
) |
Pick a new position distributed around the given one.
Uses CURAND XORWOW generator on GPU, or ROOT's gRandom on the CPU.
nthreads | Number of threads == length of vectors | |
rng | CUDA RNG states, ignored on CPU | |
sigma | Standard deviation to sample | |
current_vector | Vector of current parameters | |
proposed_vector | Output vector of proposed parameters |