Module pmt_analysis.analysis.model_independent
Expand source code
import numpy as np
import pandas as pd
from scipy.signal import savgol_filter
from tqdm import tqdm
from itertools import count
import warnings
from typing import Tuple, Union, Optional
class GainModelIndependent:
"""Class for model independent occupancy and gain calculation.
Calculation of the gain, i.e. the current amplification factor of the PMT, following the statistical
method proposed in `Saldanha, R., et al. 'Model independent approach to the single photoelectron
calibration of photomultiplier tubes.' Nuclear Instruments and Methods in Physics Research Section A:
Accelerators, Spectrometers, Detectors and Associated Equipment 863 (2017): 35-46`.
Uses the peak area spectrum from pulsed low-intensity LED light illumination and LED triggered
data taking. The fixed window analysis takes two data sets, one with LED 'off'
(practically pulsed at low enough voltage amplitude to ensure no light emission from the LED)
and one with LED 'on', usually at a target occupancy (i.e. mean number photo electrons released
per LED trigger) of ca. 2 for uncertainty minimization.
Attributes:
areas_led_on: Array with 'LED on' data pulse areas.
areas_led_off: Array with 'LED off' data pulse areas.
verbose: Verbosity of the output.
outliers_thresholds: Range of area values used for the time dependent gain calculation.
"""
def __init__(self, areas_led_on: np.ndarray, areas_led_off: np.ndarray, verbose: bool = False,
trim_outliers_bool: bool = True):
"""Init of the GainModelIndependent class.
Cleans and prepares the pulse area data inputs.
Args:
areas_led_on: Array with 'LED on' data pulse areas.
areas_led_off: Array with 'LED off' data pulse areas.
verbose: Verbosity of the output.
trim_outliers_bool: Remove outliers from input data using the `get_outlier_bounds` method.
"""
self.verbose = verbose
# Bring pulse area data inputs to numpy.ndarray format
self.areas_led_on = self.inputs_to_numpy(areas_led_on)
self.areas_led_off = self.inputs_to_numpy(areas_led_off)
# Remove outlier values
self.outliers_thresholds = (None, None)
if trim_outliers_bool:
if verbose:
print('Calculating range of area values to be used for the model independent gain calculation.')
outlier_bound_lower, outlier_bound_upper = self.get_outlier_bounds(self.areas_led_on, self.verbose)
self.outliers_thresholds = (outlier_bound_lower, outlier_bound_upper)
if verbose:
print('Trimming area outliers with limits [{}, {}].'.format(outlier_bound_lower, outlier_bound_upper))
self.areas_led_off = self.areas_led_off[(self.areas_led_off >= outlier_bound_lower)
& (self.areas_led_off <= outlier_bound_upper)]
self.areas_led_on = self.areas_led_on[(self.areas_led_on >= outlier_bound_lower)
& (self.areas_led_on <= outlier_bound_upper)]
@staticmethod
def inputs_to_numpy(input_data: Union[np.ndarray, list, dict, pd.DataFrame, pd.Series]) -> np.ndarray:
"""Bring pulse area data inputs to numpy.ndarray format.
Args:
input_data: Input pulse area data.
Allowed types: `numpy.ndarray, list, dict, pandas.DataFrame, pandas.Series`
Returns:
input_data: Input data converted to numpy.ndarray.
"""
if type(input_data) == np.ndarray:
pass
elif type(input_data) == list:
input_data = np.asarray(input_data)
elif type(input_data) == dict:
if 'area' not in input_data.keys():
raise KeyError('No key name `area` found in provided dictionary `input_data`.')
elif type(input_data['area']) == np.ndarray:
input_data = input_data['area']
elif type(input_data['area']) == list:
input_data = np.asarray(input_data['area'])
else:
raise TypeError('Unsupported type for `input_data`.')
elif type(input_data) == pd.DataFrame:
if 'area' not in input_data.columns:
raise KeyError('No column name `area` found in provided data frame `input_data`.')
else:
input_data = input_data['area'].to_numpy()
elif type(input_data) == pd.Series:
input_data = input_data.to_numpy()
else:
raise TypeError('Unsupported type for `input_data`.')
return input_data
@staticmethod
def get_outlier_bounds(input_data: np.ndarray, verbose: bool = False) -> tuple:
"""Calculate boundaries for outlier rejection.
Outliers may bias the model independent gain calculation due to their large lever and
should hence be removed.
Determined are the values at which the entries in a window of size based on square-root
binning falls below a certain threshold.
Args:
input_data: Input pulse area data.
verbose: Verbosity of the output.
Returns:
Tuple with upper and lower bound to be used for outlier rejection.
"""
window_width = np.ceil((np.percentile(input_data, 99.99) - np.percentile(input_data, 0.01))
/ np.sqrt(input_data.shape[0]))
window_width = 5*window_width
window_counts_threshold = 1
for i in tqdm(count(0), disable=not bool(verbose)):
if len(input_data[(input_data >= i) & (input_data < i + window_width)]) <= window_counts_threshold:
break
outlier_bound_upper = i + window_width
for i in tqdm(count(0), disable=not bool(verbose)):
if len(input_data[(input_data <= -i) & (input_data > -(i + window_width))]) <= window_counts_threshold:
break
outlier_bound_lower = -(i + window_width)
return outlier_bound_lower, outlier_bound_upper
@staticmethod
def get_area_histogram(input_data: np.ndarray, bin_width: int = 1,
limits: Optional[Tuple[int, int]] = None) -> tuple:
"""Function to generate area histogram with given bin width.
Args:
input_data: Input pulse area data.
bin_width: Bin width.
limits: Range of histogram bins.
Returns:
tuple(bins_centers, cnts): Tuple with bins centers and corresponding counts values.
"""
try:
bin_width = int(bin_width)
except Exception:
raise TypeError('bin_width must be of type int.')
if limits is not None:
if len(limits) != 2:
raise ValueError('Limits must be a tuple of length 2.')
try:
limits_lower = int(limits[0])
limits_upper = int(limits[1])
except Exception:
raise TypeError('Entries of limits must be of type int.')
bins_edges = np.arange(limits_lower - 0.5, limits_upper + 1.5, bin_width)
else:
bins_edges = np.arange(np.min(input_data) - 0.5, np.max(input_data) + 1.5, bin_width)
bins_centers = (bins_edges[1:] + bins_edges[:-1]) / 2
counts, _ = np.histogram(input_data, bins=bins_edges)
return bins_centers, counts
@staticmethod
def get_moments(input_data: np.ndarray) -> dict:
"""Calculate moments relevant for model independent gains calculation.
Args:
input_data: Input pulse area data.
Returns:
Dictionary with moments relevant for model independent gains calculation.
"""
dict_moments = dict()
dict_moments['mean'] = np.mean(input_data)
dict_moments['variance'] = np.var(input_data)
return dict_moments
@staticmethod
def sav_gol_smoothing(input_data: np.ndarray) -> np.ndarray:
"""Smooth `input_data` with Savitzky–Golay filter.
Args:
input_data: Input threshold dependent occupancy values.
Returns:
output_data: Savitzky–Golay filter smoothed input data.
"""
# Set length of the filter window
num = len(input_data) - 1
if num % 2 == 0:
num = num - 1
# Set order of the polynomial used to fit the samples
order = 10
order = min(order, num - 1)
# Apply filter
output_data = savgol_filter(input_data, num, order)
return output_data
def get_occupancy_model_independent(self, areas_led_on: np.ndarray, areas_led_off: np.ndarray) -> dict:
"""Calculate the occupancy for the model independent gain calculation.
For a range of thresholds within the 0PE peak areas, calculate the number of entries
below threshold for 'LED off' (`integral_b`) and 'LED on' (`integral_s`) data areas.
The occupancy can be estimated from the number of 'LED on' sample triggers with
zero LED-induced photoelectrons, as can be assumed for a sufficiently low threshold
for `integral_s`, and the total number of sample triggers, as can be by proportion estimated
through `integral_b`. As the number of photoelectrons produced follows a Poisson distribution,
we can use the expression `-ln(integral_s/integral_b)` as an estimator for the occupancy for the
selected threshold. This threshold is supposed to be sufficiently low such that `integral_s` is
not contaminated by a significant 1PE contribution. We therefore try to find the lowest
threshold for which sufficient data points are available below threshold to achieve a
relative occupancy error of below 1%. Ideally this is located in a local and global
maximum plateau around / slightly below zero areas.
Args:
areas_led_on: Array with 'LED on' data pulse areas.
areas_led_off: Array with 'LED off' data pulse areas.
Returns:
occupancy_estimator: Dictionary with the following keys:
{'occupancy': final occupancy estimate;
'occupancy_err': uncertainty final occupancy estimate;
'threshold_occupancy_determination': threshold in 0PE peak area calculations
for final occupancy estimate;
'thr_occ_det_integral_fraction': fraction of entries in 'LED off' data below threshold;
'tot_entries_b': total number of waveforms in 'LED off' data,
'iterations': dictionary with the tested thresholds and corresponding
occupancy, occupancy uncertainty, and smoothed occupancy values}
"""
# Total number of waveforms in 'LED off' data
tot_entries_b = areas_led_off.shape[0]
# Define list of thresholds to probe in iterative occupancy determination
if self.outliers_thresholds[0] is None:
thr_it_occ_det = np.abs(self.get_outlier_bounds(areas_led_on)[0])
else:
thr_it_occ_det = np.abs(self.outliers_thresholds[0])
lower_thr_it_occ_det = - thr_it_occ_det / 2
upper_thr_it_occ_det = thr_it_occ_det / 2
list_thr_it_occ_det = np.arange(lower_thr_it_occ_det, upper_thr_it_occ_det + 1)
# Initialize lists for resulting occupancies as a function of threshold.
# Use lists with append as it showed to be faster than np.arrays in this application.
occupancy_list = list()
occupancy_err_list = list()
thresholds_list = list()
f_list = list()
# Loop over thresholds and calculate corresponding occupancy and error
for threshold in list_thr_it_occ_det:
# Calculate number of entries below peak area threshold for 'LED off' (integral_b) and
# 'LED on' (integral_s) data. Correction factor on integral_b (and later also f) should
# be close to one and only correct for differences due to outlier removal.
# If significantly different cardinalities of LED on and off data sets are used,
# the error calculation below may become incorrect.
integral_b = np.sum(areas_led_off < threshold) * len(areas_led_on) / len(areas_led_off)
integral_s = np.sum(areas_led_on < threshold)
# Perform occupancy calculations only for positive number of entries below threshold
if integral_b > 0 and integral_s > 0:
# Fraction of entries in 'LED off' data below threshold
f = integral_b / (tot_entries_b * len(areas_led_on) / len(areas_led_off))
# The occupancy can be estimated from the number of 'LED on' sample triggers with zero LED-induced
# photoelectrons, as can be assumed for a sufficiently low threshold for integral_s, and the
# total number of sample triggers, as can be by proportion estimated through integral_b.
# As the number of photoelectrons produced follows a Poisson distribution, we can use the following
# expression as an estimator for the occupancy for the selected threshold (which, if sufficiently low
# to not be contaminated by a significant 1PE contribution,
# should not change the obtained occupancy value).
l_val = -np.log(integral_s / integral_b)
l_err = np.sqrt((np.exp(l_val) + 1. - 2. * f) / integral_b)
# Only consider occupancy values with relative uncertainties below 5%.
if l_err / l_val <= 0.05:
occupancy_list.append(l_val)
occupancy_err_list.append(l_err)
thresholds_list.append(threshold)
f_list.append(f)
if len(occupancy_list) == 0:
raise ValueError('No occupancy values determined in threshold iterations.')
# Convert to numpy arrays
occupancy_list = np.asarray(occupancy_list)
occupancy_err_list = np.asarray(occupancy_err_list)
thresholds_list = np.asarray(thresholds_list)
f_list = np.asarray(f_list)
# Smooth threshold-dependent occupancies with Savitzky–Golay filter
occupancy_list_smooth = self.sav_gol_smoothing(occupancy_list)
# Sort indices in smoothed occupancy list by their element value in descending order
occupancy_list_smooth_argsort = occupancy_list_smooth.argsort()[::-1]
# Find threshold and corresponding occupancy for highest smoothed occupancy value
# for which the relative occupancy error is below 1%.
# Ideally this is located in a local and global maximum plateau around / slightly below zero areas
# where 1PE contributions are negligible but on the other hand sufficient data points are available
# below the threshold to bring the relative uncertainty down to the required level.
# If no threshold value fulfils this criterion, return NaN occupancy.
occupancy_estimate = np.nan
occupancy_estimate_err = np.nan
threshold_occ_det = np.nan
thr_occ_det_integral_fraction = np.nan
for idx in occupancy_list_smooth_argsort:
if occupancy_err_list[idx] / occupancy_list[idx] < 0.01:
occupancy_estimate = occupancy_list[idx]
occupancy_estimate_err = occupancy_err_list[idx]
threshold_occ_det = thresholds_list[idx]
thr_occ_det_integral_fraction = f_list[idx]
break
if np.isnan(occupancy_estimate):
warnings.warn('Failed to estimate occupancy with the required precision. Returned NaN.')
elif occupancy_estimate <= 0:
warnings.warn('Warning: Estimated occupancy ({:.3f} ± {:.3f}) seems to be '
'less than zero.'.format(occupancy_estimate, occupancy_estimate_err))
elif self.verbose:
print('Threshold for occupancy estimation: {}'.format(threshold_occ_det))
print('Estimated occupancy: {:.3f} ± {:.3f}'.format(occupancy_estimate, occupancy_estimate_err))
occupancy_estimator = {'occupancy': occupancy_estimate,
'occupancy_err': occupancy_estimate_err,
'threshold_occupancy_determination': threshold_occ_det,
'thr_occ_det_integral_fraction': thr_occ_det_integral_fraction,
'tot_entries_b': tot_entries_b,
'iterations': {'threshold': thresholds_list,
'occupancy': occupancy_list,
'occupancy_err': occupancy_err_list,
'occupancy_smoothed': occupancy_list_smooth},
}
return occupancy_estimator
def get_gain_model_independent(self, areas_led_on: np.ndarray, areas_led_off: np.ndarray,
occupancy_estimator: dict, calc_spe_resolution: bool = True) -> dict:
"""Calculate model independent gain value.
Args:
areas_led_on: Array with 'LED on' data pulse areas.
areas_led_off: Array with 'LED off' data pulse areas.
occupancy_estimator: Output dictionary from `get_occupancy_model_independent` method.
Must contain at least the following keys: `occupancy`, `occupancy_err`,
`thr_occ_det_integral_fraction`, `tot_entries_b`.
calc_spe_resolution: If true, calculate the SPE resolution from mean and variance
of the single photoelectron response.
Returns:
gain_estimator: Dictionary with the following keys:
{'moments_s': dict with first two moments of 'LED on' area distribution,
'moments_b': dict with first two moments of 'LED off' area distribution,
'mean_psi': mean of the single photoelectron response (unconverted gain),
'var_psi': variance of the single photoelectron response,
'mean_psi_stat_err': statistical error of mean_psi,
'mean_psi_sys_err': systematic error of mean_psi,
'mean_psi_err': total error of mean_psi,
'spe_resolution': SPE resolution (if `calc_spe_resolution` argument true),
'spe_resolution_stat_err': statistical error of spe_resolution (if `calc_spe_resolution` argument true),
'spe_resolution_sys_err': statistical error of spe_resolution (if `calc_spe_resolution` argument true),
'spe_resolution_err': total error of spe_resolution (if `calc_spe_resolution` argument true)}
"""
# Get moments of area distributions
moments_s = self.get_moments(areas_led_on)
mean_s = moments_s['mean']
var_s = moments_s['variance']
moments_b = self.get_moments(areas_led_off)
mean_b = moments_b['mean']
var_b = moments_b['variance']
# Get occupancy related values
occupancy = occupancy_estimator['occupancy']
occupancy_err = occupancy_estimator['occupancy_err']
f_b = occupancy_estimator['thr_occ_det_integral_fraction']
tot_b = occupancy_estimator['tot_entries_b']
# Calculate first two central moments of the single photoelectron response
# and the uncertainties of the mean
mean_psi = (mean_s - mean_b) / occupancy
var_psi = (var_s - var_b) / occupancy - mean_psi**2
mean_psi_stat_err = np.sqrt((occupancy * (mean_psi**2 + var_psi) + 2 * var_b) / (tot_b * occupancy**2) + (
mean_psi * mean_psi * (np.exp(occupancy) + 1 - 2 * f_b)) / (f_b * tot_b * occupancy**2))
mean_psi_sys_err = (mean_s - mean_b) * occupancy_err / (occupancy**2)
mean_psi_err = np.sqrt(mean_psi_stat_err**2 + mean_psi_sys_err**2)
gain_estimator = {'moments_s': moments_s, 'moments_b': moments_b,
'mean_psi': mean_psi, 'variance_psi': var_psi,
'mean_psi_stat_err': mean_psi_stat_err, 'mean_psi_sys_err': mean_psi_sys_err,
'mean_psi_err': mean_psi_err}
# Calculate SPE resolution
if calc_spe_resolution:
spe_resolution = np.sqrt(var_psi) / mean_psi
var_psi_stat_err = np.sqrt(((mean_psi ** 2 - var_psi) ** 2 * (np.exp(occupancy) + 1 - 2 * f_b)) / (
f_b * tot_b * occupancy ** 2))
spe_resolution_stat_err = spe_resolution / 2 * var_psi_stat_err / var_psi
var_psi_sys_err = np.sqrt(
((var_s - var_b) * occupancy_err / (occupancy ** 2)) ** 2 + (2 * mean_psi * mean_psi_sys_err) ** 2)
spe_resolution_sys_err = spe_resolution / 2 * var_psi_sys_err / var_psi
spe_resolution_err = np.sqrt(spe_resolution_stat_err ** 2 + spe_resolution_sys_err ** 2)
gain_estimator.update({'spe_resolution': spe_resolution,
'spe_resolution_stat_err': spe_resolution_stat_err,
'spe_resolution_sys_err': spe_resolution_sys_err,
'spe_resolution_err': spe_resolution_err})
return gain_estimator
def compute(self, areas_led_on: np.ndarray, areas_led_off: np.ndarray, adc_to_e: float = np.nan) -> dict:
"""Perform full model independent gain and occupancy calculation.
Args:
areas_led_on: Array with 'LED on' data pulse areas.
areas_led_off: Array with 'LED off' data pulse areas.
adc_to_e: Conversion factor pulse area in ADC units to charge in units of elementary charge.
Returns:
output_dict: Dictionary with the following keys:
{'occupancy': final occupancy estimate;
'occupancy_err': uncertainty final occupancy estimate;
'threshold_occupancy_determination': threshold in 0PE peak area calculations
for final occupancy estimate;
'thr_occ_det_integral_fraction': fraction of entries in 'LED off' data below threshold;
'tot_entries_b': total number of waveforms in 'LED off' data,
'iterations': dictionary with the tested thresholds and corresponding
occupancy, occupancy uncertainty, and smoothed occupancy values,
'moments_s': dict with first two moments of 'LED on' area distribution,
'moments_b': dict with first two moments of 'LED off' area distribution,
'mean_psi': mean of the single photoelectron response (unconverted gain),
'var_psi': variance of the single photoelectron response,
'mean_psi_stat_err': statistical error of mean_psi,
'mean_psi_sys_err': systematic error of mean_psi,
'mean_psi_err': total error of mean_psi,
'gain': gain values (in units of read out electrons per induced photoelectron),
'gain_stat_err': statistical error gain,
'gain_sys_err': systematic error gain,
'gain_err': total error gain,
'outlier_thresholds': range of area values to be used (excluding outliers),
'histograms:' dictionary with the bin centers and counts for 'LED on' and 'LED off'
area histograms with default bin width of 10
}
"""
# Calculate occupancy and gain
occupancy_estimator = self.get_occupancy_model_independent(areas_led_on, areas_led_off)
gain_estimator = self.get_gain_model_independent(areas_led_on, areas_led_off, occupancy_estimator)
# Convert to gain values (in units of read out electrons per induced photoelectron)
if np.isnan(adc_to_e):
warnings.warn('Unable to convert gain values as no valid input for adc_to_e provided.')
gain_converted = gain_estimator['mean_psi'] * adc_to_e
gain_stat_err_converted = gain_estimator['mean_psi_stat_err'] * adc_to_e
gain_sys_err_converted = gain_estimator['mean_psi_sys_err'] * adc_to_e
gain_err_converted = gain_estimator['mean_psi_err'] * adc_to_e
if self.verbose:
print('Estimated gain [10^6]: {:.3f} ± {:.3f} (stat) ± {:.3f} (syst)'.format(
gain_converted*1e-6, gain_stat_err_converted*1e-6, gain_sys_err_converted*1e-6))
gain_estimator_converted = {'gain': gain_converted, 'gain_stat_err': gain_stat_err_converted,
'gain_sys_err': gain_sys_err_converted, 'gain_err': gain_err_converted}
# Generate histograms for later reference
hist_bin_centers, hist_counts_led_on = self.get_area_histogram(areas_led_on, bin_width=10,
limits=self.outliers_thresholds)
hist_bin_centers, hist_counts_led_off = self.get_area_histogram(areas_led_off, bin_width=10,
limits=self.outliers_thresholds)
histograms = {'histograms': {'bin_centers': hist_bin_centers,
'counts_led_on': hist_counts_led_on,
'counts_led_off': hist_counts_led_off}}
# Save everything in a dictionary
output_dict = dict(**occupancy_estimator, **gain_estimator, **gain_estimator_converted,
**{'outlier_thresholds': self.outliers_thresholds}, **histograms)
return output_dict
Classes
class GainModelIndependent (areas_led_on: numpy.ndarray, areas_led_off: numpy.ndarray, verbose: bool = False, trim_outliers_bool: bool = True)
-
Class for model independent occupancy and gain calculation.
Calculation of the gain, i.e. the current amplification factor of the PMT, following the statistical method proposed in
Saldanha, R., et al. 'Model independent approach to the single photoelectron calibration of photomultiplier tubes.' Nuclear Instruments and Methods in Physics Research Section A: Accelerators, Spectrometers, Detectors and Associated Equipment 863 (2017): 35-46
.Uses the peak area spectrum from pulsed low-intensity LED light illumination and LED triggered data taking. The fixed window analysis takes two data sets, one with LED 'off' (practically pulsed at low enough voltage amplitude to ensure no light emission from the LED) and one with LED 'on', usually at a target occupancy (i.e. mean number photo electrons released per LED trigger) of ca. 2 for uncertainty minimization.
Attributes
areas_led_on
- Array with 'LED on' data pulse areas.
areas_led_off
- Array with 'LED off' data pulse areas.
verbose
- Verbosity of the output.
outliers_thresholds
- Range of area values used for the time dependent gain calculation.
Init of the GainModelIndependent class.
Cleans and prepares the pulse area data inputs.
Args
areas_led_on
- Array with 'LED on' data pulse areas.
areas_led_off
- Array with 'LED off' data pulse areas.
verbose
- Verbosity of the output.
trim_outliers_bool
- Remove outliers from input data using the
get_outlier_bounds
method.
Expand source code
class GainModelIndependent: """Class for model independent occupancy and gain calculation. Calculation of the gain, i.e. the current amplification factor of the PMT, following the statistical method proposed in `Saldanha, R., et al. 'Model independent approach to the single photoelectron calibration of photomultiplier tubes.' Nuclear Instruments and Methods in Physics Research Section A: Accelerators, Spectrometers, Detectors and Associated Equipment 863 (2017): 35-46`. Uses the peak area spectrum from pulsed low-intensity LED light illumination and LED triggered data taking. The fixed window analysis takes two data sets, one with LED 'off' (practically pulsed at low enough voltage amplitude to ensure no light emission from the LED) and one with LED 'on', usually at a target occupancy (i.e. mean number photo electrons released per LED trigger) of ca. 2 for uncertainty minimization. Attributes: areas_led_on: Array with 'LED on' data pulse areas. areas_led_off: Array with 'LED off' data pulse areas. verbose: Verbosity of the output. outliers_thresholds: Range of area values used for the time dependent gain calculation. """ def __init__(self, areas_led_on: np.ndarray, areas_led_off: np.ndarray, verbose: bool = False, trim_outliers_bool: bool = True): """Init of the GainModelIndependent class. Cleans and prepares the pulse area data inputs. Args: areas_led_on: Array with 'LED on' data pulse areas. areas_led_off: Array with 'LED off' data pulse areas. verbose: Verbosity of the output. trim_outliers_bool: Remove outliers from input data using the `get_outlier_bounds` method. """ self.verbose = verbose # Bring pulse area data inputs to numpy.ndarray format self.areas_led_on = self.inputs_to_numpy(areas_led_on) self.areas_led_off = self.inputs_to_numpy(areas_led_off) # Remove outlier values self.outliers_thresholds = (None, None) if trim_outliers_bool: if verbose: print('Calculating range of area values to be used for the model independent gain calculation.') outlier_bound_lower, outlier_bound_upper = self.get_outlier_bounds(self.areas_led_on, self.verbose) self.outliers_thresholds = (outlier_bound_lower, outlier_bound_upper) if verbose: print('Trimming area outliers with limits [{}, {}].'.format(outlier_bound_lower, outlier_bound_upper)) self.areas_led_off = self.areas_led_off[(self.areas_led_off >= outlier_bound_lower) & (self.areas_led_off <= outlier_bound_upper)] self.areas_led_on = self.areas_led_on[(self.areas_led_on >= outlier_bound_lower) & (self.areas_led_on <= outlier_bound_upper)] @staticmethod def inputs_to_numpy(input_data: Union[np.ndarray, list, dict, pd.DataFrame, pd.Series]) -> np.ndarray: """Bring pulse area data inputs to numpy.ndarray format. Args: input_data: Input pulse area data. Allowed types: `numpy.ndarray, list, dict, pandas.DataFrame, pandas.Series` Returns: input_data: Input data converted to numpy.ndarray. """ if type(input_data) == np.ndarray: pass elif type(input_data) == list: input_data = np.asarray(input_data) elif type(input_data) == dict: if 'area' not in input_data.keys(): raise KeyError('No key name `area` found in provided dictionary `input_data`.') elif type(input_data['area']) == np.ndarray: input_data = input_data['area'] elif type(input_data['area']) == list: input_data = np.asarray(input_data['area']) else: raise TypeError('Unsupported type for `input_data`.') elif type(input_data) == pd.DataFrame: if 'area' not in input_data.columns: raise KeyError('No column name `area` found in provided data frame `input_data`.') else: input_data = input_data['area'].to_numpy() elif type(input_data) == pd.Series: input_data = input_data.to_numpy() else: raise TypeError('Unsupported type for `input_data`.') return input_data @staticmethod def get_outlier_bounds(input_data: np.ndarray, verbose: bool = False) -> tuple: """Calculate boundaries for outlier rejection. Outliers may bias the model independent gain calculation due to their large lever and should hence be removed. Determined are the values at which the entries in a window of size based on square-root binning falls below a certain threshold. Args: input_data: Input pulse area data. verbose: Verbosity of the output. Returns: Tuple with upper and lower bound to be used for outlier rejection. """ window_width = np.ceil((np.percentile(input_data, 99.99) - np.percentile(input_data, 0.01)) / np.sqrt(input_data.shape[0])) window_width = 5*window_width window_counts_threshold = 1 for i in tqdm(count(0), disable=not bool(verbose)): if len(input_data[(input_data >= i) & (input_data < i + window_width)]) <= window_counts_threshold: break outlier_bound_upper = i + window_width for i in tqdm(count(0), disable=not bool(verbose)): if len(input_data[(input_data <= -i) & (input_data > -(i + window_width))]) <= window_counts_threshold: break outlier_bound_lower = -(i + window_width) return outlier_bound_lower, outlier_bound_upper @staticmethod def get_area_histogram(input_data: np.ndarray, bin_width: int = 1, limits: Optional[Tuple[int, int]] = None) -> tuple: """Function to generate area histogram with given bin width. Args: input_data: Input pulse area data. bin_width: Bin width. limits: Range of histogram bins. Returns: tuple(bins_centers, cnts): Tuple with bins centers and corresponding counts values. """ try: bin_width = int(bin_width) except Exception: raise TypeError('bin_width must be of type int.') if limits is not None: if len(limits) != 2: raise ValueError('Limits must be a tuple of length 2.') try: limits_lower = int(limits[0]) limits_upper = int(limits[1]) except Exception: raise TypeError('Entries of limits must be of type int.') bins_edges = np.arange(limits_lower - 0.5, limits_upper + 1.5, bin_width) else: bins_edges = np.arange(np.min(input_data) - 0.5, np.max(input_data) + 1.5, bin_width) bins_centers = (bins_edges[1:] + bins_edges[:-1]) / 2 counts, _ = np.histogram(input_data, bins=bins_edges) return bins_centers, counts @staticmethod def get_moments(input_data: np.ndarray) -> dict: """Calculate moments relevant for model independent gains calculation. Args: input_data: Input pulse area data. Returns: Dictionary with moments relevant for model independent gains calculation. """ dict_moments = dict() dict_moments['mean'] = np.mean(input_data) dict_moments['variance'] = np.var(input_data) return dict_moments @staticmethod def sav_gol_smoothing(input_data: np.ndarray) -> np.ndarray: """Smooth `input_data` with Savitzky–Golay filter. Args: input_data: Input threshold dependent occupancy values. Returns: output_data: Savitzky–Golay filter smoothed input data. """ # Set length of the filter window num = len(input_data) - 1 if num % 2 == 0: num = num - 1 # Set order of the polynomial used to fit the samples order = 10 order = min(order, num - 1) # Apply filter output_data = savgol_filter(input_data, num, order) return output_data def get_occupancy_model_independent(self, areas_led_on: np.ndarray, areas_led_off: np.ndarray) -> dict: """Calculate the occupancy for the model independent gain calculation. For a range of thresholds within the 0PE peak areas, calculate the number of entries below threshold for 'LED off' (`integral_b`) and 'LED on' (`integral_s`) data areas. The occupancy can be estimated from the number of 'LED on' sample triggers with zero LED-induced photoelectrons, as can be assumed for a sufficiently low threshold for `integral_s`, and the total number of sample triggers, as can be by proportion estimated through `integral_b`. As the number of photoelectrons produced follows a Poisson distribution, we can use the expression `-ln(integral_s/integral_b)` as an estimator for the occupancy for the selected threshold. This threshold is supposed to be sufficiently low such that `integral_s` is not contaminated by a significant 1PE contribution. We therefore try to find the lowest threshold for which sufficient data points are available below threshold to achieve a relative occupancy error of below 1%. Ideally this is located in a local and global maximum plateau around / slightly below zero areas. Args: areas_led_on: Array with 'LED on' data pulse areas. areas_led_off: Array with 'LED off' data pulse areas. Returns: occupancy_estimator: Dictionary with the following keys: {'occupancy': final occupancy estimate; 'occupancy_err': uncertainty final occupancy estimate; 'threshold_occupancy_determination': threshold in 0PE peak area calculations for final occupancy estimate; 'thr_occ_det_integral_fraction': fraction of entries in 'LED off' data below threshold; 'tot_entries_b': total number of waveforms in 'LED off' data, 'iterations': dictionary with the tested thresholds and corresponding occupancy, occupancy uncertainty, and smoothed occupancy values} """ # Total number of waveforms in 'LED off' data tot_entries_b = areas_led_off.shape[0] # Define list of thresholds to probe in iterative occupancy determination if self.outliers_thresholds[0] is None: thr_it_occ_det = np.abs(self.get_outlier_bounds(areas_led_on)[0]) else: thr_it_occ_det = np.abs(self.outliers_thresholds[0]) lower_thr_it_occ_det = - thr_it_occ_det / 2 upper_thr_it_occ_det = thr_it_occ_det / 2 list_thr_it_occ_det = np.arange(lower_thr_it_occ_det, upper_thr_it_occ_det + 1) # Initialize lists for resulting occupancies as a function of threshold. # Use lists with append as it showed to be faster than np.arrays in this application. occupancy_list = list() occupancy_err_list = list() thresholds_list = list() f_list = list() # Loop over thresholds and calculate corresponding occupancy and error for threshold in list_thr_it_occ_det: # Calculate number of entries below peak area threshold for 'LED off' (integral_b) and # 'LED on' (integral_s) data. Correction factor on integral_b (and later also f) should # be close to one and only correct for differences due to outlier removal. # If significantly different cardinalities of LED on and off data sets are used, # the error calculation below may become incorrect. integral_b = np.sum(areas_led_off < threshold) * len(areas_led_on) / len(areas_led_off) integral_s = np.sum(areas_led_on < threshold) # Perform occupancy calculations only for positive number of entries below threshold if integral_b > 0 and integral_s > 0: # Fraction of entries in 'LED off' data below threshold f = integral_b / (tot_entries_b * len(areas_led_on) / len(areas_led_off)) # The occupancy can be estimated from the number of 'LED on' sample triggers with zero LED-induced # photoelectrons, as can be assumed for a sufficiently low threshold for integral_s, and the # total number of sample triggers, as can be by proportion estimated through integral_b. # As the number of photoelectrons produced follows a Poisson distribution, we can use the following # expression as an estimator for the occupancy for the selected threshold (which, if sufficiently low # to not be contaminated by a significant 1PE contribution, # should not change the obtained occupancy value). l_val = -np.log(integral_s / integral_b) l_err = np.sqrt((np.exp(l_val) + 1. - 2. * f) / integral_b) # Only consider occupancy values with relative uncertainties below 5%. if l_err / l_val <= 0.05: occupancy_list.append(l_val) occupancy_err_list.append(l_err) thresholds_list.append(threshold) f_list.append(f) if len(occupancy_list) == 0: raise ValueError('No occupancy values determined in threshold iterations.') # Convert to numpy arrays occupancy_list = np.asarray(occupancy_list) occupancy_err_list = np.asarray(occupancy_err_list) thresholds_list = np.asarray(thresholds_list) f_list = np.asarray(f_list) # Smooth threshold-dependent occupancies with Savitzky–Golay filter occupancy_list_smooth = self.sav_gol_smoothing(occupancy_list) # Sort indices in smoothed occupancy list by their element value in descending order occupancy_list_smooth_argsort = occupancy_list_smooth.argsort()[::-1] # Find threshold and corresponding occupancy for highest smoothed occupancy value # for which the relative occupancy error is below 1%. # Ideally this is located in a local and global maximum plateau around / slightly below zero areas # where 1PE contributions are negligible but on the other hand sufficient data points are available # below the threshold to bring the relative uncertainty down to the required level. # If no threshold value fulfils this criterion, return NaN occupancy. occupancy_estimate = np.nan occupancy_estimate_err = np.nan threshold_occ_det = np.nan thr_occ_det_integral_fraction = np.nan for idx in occupancy_list_smooth_argsort: if occupancy_err_list[idx] / occupancy_list[idx] < 0.01: occupancy_estimate = occupancy_list[idx] occupancy_estimate_err = occupancy_err_list[idx] threshold_occ_det = thresholds_list[idx] thr_occ_det_integral_fraction = f_list[idx] break if np.isnan(occupancy_estimate): warnings.warn('Failed to estimate occupancy with the required precision. Returned NaN.') elif occupancy_estimate <= 0: warnings.warn('Warning: Estimated occupancy ({:.3f} ± {:.3f}) seems to be ' 'less than zero.'.format(occupancy_estimate, occupancy_estimate_err)) elif self.verbose: print('Threshold for occupancy estimation: {}'.format(threshold_occ_det)) print('Estimated occupancy: {:.3f} ± {:.3f}'.format(occupancy_estimate, occupancy_estimate_err)) occupancy_estimator = {'occupancy': occupancy_estimate, 'occupancy_err': occupancy_estimate_err, 'threshold_occupancy_determination': threshold_occ_det, 'thr_occ_det_integral_fraction': thr_occ_det_integral_fraction, 'tot_entries_b': tot_entries_b, 'iterations': {'threshold': thresholds_list, 'occupancy': occupancy_list, 'occupancy_err': occupancy_err_list, 'occupancy_smoothed': occupancy_list_smooth}, } return occupancy_estimator def get_gain_model_independent(self, areas_led_on: np.ndarray, areas_led_off: np.ndarray, occupancy_estimator: dict, calc_spe_resolution: bool = True) -> dict: """Calculate model independent gain value. Args: areas_led_on: Array with 'LED on' data pulse areas. areas_led_off: Array with 'LED off' data pulse areas. occupancy_estimator: Output dictionary from `get_occupancy_model_independent` method. Must contain at least the following keys: `occupancy`, `occupancy_err`, `thr_occ_det_integral_fraction`, `tot_entries_b`. calc_spe_resolution: If true, calculate the SPE resolution from mean and variance of the single photoelectron response. Returns: gain_estimator: Dictionary with the following keys: {'moments_s': dict with first two moments of 'LED on' area distribution, 'moments_b': dict with first two moments of 'LED off' area distribution, 'mean_psi': mean of the single photoelectron response (unconverted gain), 'var_psi': variance of the single photoelectron response, 'mean_psi_stat_err': statistical error of mean_psi, 'mean_psi_sys_err': systematic error of mean_psi, 'mean_psi_err': total error of mean_psi, 'spe_resolution': SPE resolution (if `calc_spe_resolution` argument true), 'spe_resolution_stat_err': statistical error of spe_resolution (if `calc_spe_resolution` argument true), 'spe_resolution_sys_err': statistical error of spe_resolution (if `calc_spe_resolution` argument true), 'spe_resolution_err': total error of spe_resolution (if `calc_spe_resolution` argument true)} """ # Get moments of area distributions moments_s = self.get_moments(areas_led_on) mean_s = moments_s['mean'] var_s = moments_s['variance'] moments_b = self.get_moments(areas_led_off) mean_b = moments_b['mean'] var_b = moments_b['variance'] # Get occupancy related values occupancy = occupancy_estimator['occupancy'] occupancy_err = occupancy_estimator['occupancy_err'] f_b = occupancy_estimator['thr_occ_det_integral_fraction'] tot_b = occupancy_estimator['tot_entries_b'] # Calculate first two central moments of the single photoelectron response # and the uncertainties of the mean mean_psi = (mean_s - mean_b) / occupancy var_psi = (var_s - var_b) / occupancy - mean_psi**2 mean_psi_stat_err = np.sqrt((occupancy * (mean_psi**2 + var_psi) + 2 * var_b) / (tot_b * occupancy**2) + ( mean_psi * mean_psi * (np.exp(occupancy) + 1 - 2 * f_b)) / (f_b * tot_b * occupancy**2)) mean_psi_sys_err = (mean_s - mean_b) * occupancy_err / (occupancy**2) mean_psi_err = np.sqrt(mean_psi_stat_err**2 + mean_psi_sys_err**2) gain_estimator = {'moments_s': moments_s, 'moments_b': moments_b, 'mean_psi': mean_psi, 'variance_psi': var_psi, 'mean_psi_stat_err': mean_psi_stat_err, 'mean_psi_sys_err': mean_psi_sys_err, 'mean_psi_err': mean_psi_err} # Calculate SPE resolution if calc_spe_resolution: spe_resolution = np.sqrt(var_psi) / mean_psi var_psi_stat_err = np.sqrt(((mean_psi ** 2 - var_psi) ** 2 * (np.exp(occupancy) + 1 - 2 * f_b)) / ( f_b * tot_b * occupancy ** 2)) spe_resolution_stat_err = spe_resolution / 2 * var_psi_stat_err / var_psi var_psi_sys_err = np.sqrt( ((var_s - var_b) * occupancy_err / (occupancy ** 2)) ** 2 + (2 * mean_psi * mean_psi_sys_err) ** 2) spe_resolution_sys_err = spe_resolution / 2 * var_psi_sys_err / var_psi spe_resolution_err = np.sqrt(spe_resolution_stat_err ** 2 + spe_resolution_sys_err ** 2) gain_estimator.update({'spe_resolution': spe_resolution, 'spe_resolution_stat_err': spe_resolution_stat_err, 'spe_resolution_sys_err': spe_resolution_sys_err, 'spe_resolution_err': spe_resolution_err}) return gain_estimator def compute(self, areas_led_on: np.ndarray, areas_led_off: np.ndarray, adc_to_e: float = np.nan) -> dict: """Perform full model independent gain and occupancy calculation. Args: areas_led_on: Array with 'LED on' data pulse areas. areas_led_off: Array with 'LED off' data pulse areas. adc_to_e: Conversion factor pulse area in ADC units to charge in units of elementary charge. Returns: output_dict: Dictionary with the following keys: {'occupancy': final occupancy estimate; 'occupancy_err': uncertainty final occupancy estimate; 'threshold_occupancy_determination': threshold in 0PE peak area calculations for final occupancy estimate; 'thr_occ_det_integral_fraction': fraction of entries in 'LED off' data below threshold; 'tot_entries_b': total number of waveforms in 'LED off' data, 'iterations': dictionary with the tested thresholds and corresponding occupancy, occupancy uncertainty, and smoothed occupancy values, 'moments_s': dict with first two moments of 'LED on' area distribution, 'moments_b': dict with first two moments of 'LED off' area distribution, 'mean_psi': mean of the single photoelectron response (unconverted gain), 'var_psi': variance of the single photoelectron response, 'mean_psi_stat_err': statistical error of mean_psi, 'mean_psi_sys_err': systematic error of mean_psi, 'mean_psi_err': total error of mean_psi, 'gain': gain values (in units of read out electrons per induced photoelectron), 'gain_stat_err': statistical error gain, 'gain_sys_err': systematic error gain, 'gain_err': total error gain, 'outlier_thresholds': range of area values to be used (excluding outliers), 'histograms:' dictionary with the bin centers and counts for 'LED on' and 'LED off' area histograms with default bin width of 10 } """ # Calculate occupancy and gain occupancy_estimator = self.get_occupancy_model_independent(areas_led_on, areas_led_off) gain_estimator = self.get_gain_model_independent(areas_led_on, areas_led_off, occupancy_estimator) # Convert to gain values (in units of read out electrons per induced photoelectron) if np.isnan(adc_to_e): warnings.warn('Unable to convert gain values as no valid input for adc_to_e provided.') gain_converted = gain_estimator['mean_psi'] * adc_to_e gain_stat_err_converted = gain_estimator['mean_psi_stat_err'] * adc_to_e gain_sys_err_converted = gain_estimator['mean_psi_sys_err'] * adc_to_e gain_err_converted = gain_estimator['mean_psi_err'] * adc_to_e if self.verbose: print('Estimated gain [10^6]: {:.3f} ± {:.3f} (stat) ± {:.3f} (syst)'.format( gain_converted*1e-6, gain_stat_err_converted*1e-6, gain_sys_err_converted*1e-6)) gain_estimator_converted = {'gain': gain_converted, 'gain_stat_err': gain_stat_err_converted, 'gain_sys_err': gain_sys_err_converted, 'gain_err': gain_err_converted} # Generate histograms for later reference hist_bin_centers, hist_counts_led_on = self.get_area_histogram(areas_led_on, bin_width=10, limits=self.outliers_thresholds) hist_bin_centers, hist_counts_led_off = self.get_area_histogram(areas_led_off, bin_width=10, limits=self.outliers_thresholds) histograms = {'histograms': {'bin_centers': hist_bin_centers, 'counts_led_on': hist_counts_led_on, 'counts_led_off': hist_counts_led_off}} # Save everything in a dictionary output_dict = dict(**occupancy_estimator, **gain_estimator, **gain_estimator_converted, **{'outlier_thresholds': self.outliers_thresholds}, **histograms) return output_dict
Static methods
def get_area_histogram(input_data: numpy.ndarray, bin_width: int = 1, limits: Optional[Tuple[int, int]] = None) ‑> tuple
-
Function to generate area histogram with given bin width.
Args
input_data
- Input pulse area data.
bin_width
- Bin width.
limits
- Range of histogram bins.
Returns
tuple(bins_centers, cnts): Tuple with bins centers and corresponding counts values.
Expand source code
@staticmethod def get_area_histogram(input_data: np.ndarray, bin_width: int = 1, limits: Optional[Tuple[int, int]] = None) -> tuple: """Function to generate area histogram with given bin width. Args: input_data: Input pulse area data. bin_width: Bin width. limits: Range of histogram bins. Returns: tuple(bins_centers, cnts): Tuple with bins centers and corresponding counts values. """ try: bin_width = int(bin_width) except Exception: raise TypeError('bin_width must be of type int.') if limits is not None: if len(limits) != 2: raise ValueError('Limits must be a tuple of length 2.') try: limits_lower = int(limits[0]) limits_upper = int(limits[1]) except Exception: raise TypeError('Entries of limits must be of type int.') bins_edges = np.arange(limits_lower - 0.5, limits_upper + 1.5, bin_width) else: bins_edges = np.arange(np.min(input_data) - 0.5, np.max(input_data) + 1.5, bin_width) bins_centers = (bins_edges[1:] + bins_edges[:-1]) / 2 counts, _ = np.histogram(input_data, bins=bins_edges) return bins_centers, counts
def get_moments(input_data: numpy.ndarray) ‑> dict
-
Calculate moments relevant for model independent gains calculation.
Args
input_data
- Input pulse area data.
Returns
Dictionary with moments relevant for model independent gains calculation.
Expand source code
@staticmethod def get_moments(input_data: np.ndarray) -> dict: """Calculate moments relevant for model independent gains calculation. Args: input_data: Input pulse area data. Returns: Dictionary with moments relevant for model independent gains calculation. """ dict_moments = dict() dict_moments['mean'] = np.mean(input_data) dict_moments['variance'] = np.var(input_data) return dict_moments
def get_outlier_bounds(input_data: numpy.ndarray, verbose: bool = False) ‑> tuple
-
Calculate boundaries for outlier rejection.
Outliers may bias the model independent gain calculation due to their large lever and should hence be removed. Determined are the values at which the entries in a window of size based on square-root binning falls below a certain threshold.
Args
input_data
- Input pulse area data.
verbose
- Verbosity of the output.
Returns
Tuple with upper and lower bound to be used for outlier rejection.
Expand source code
@staticmethod def get_outlier_bounds(input_data: np.ndarray, verbose: bool = False) -> tuple: """Calculate boundaries for outlier rejection. Outliers may bias the model independent gain calculation due to their large lever and should hence be removed. Determined are the values at which the entries in a window of size based on square-root binning falls below a certain threshold. Args: input_data: Input pulse area data. verbose: Verbosity of the output. Returns: Tuple with upper and lower bound to be used for outlier rejection. """ window_width = np.ceil((np.percentile(input_data, 99.99) - np.percentile(input_data, 0.01)) / np.sqrt(input_data.shape[0])) window_width = 5*window_width window_counts_threshold = 1 for i in tqdm(count(0), disable=not bool(verbose)): if len(input_data[(input_data >= i) & (input_data < i + window_width)]) <= window_counts_threshold: break outlier_bound_upper = i + window_width for i in tqdm(count(0), disable=not bool(verbose)): if len(input_data[(input_data <= -i) & (input_data > -(i + window_width))]) <= window_counts_threshold: break outlier_bound_lower = -(i + window_width) return outlier_bound_lower, outlier_bound_upper
def inputs_to_numpy(input_data: Union[numpy.ndarray, list, dict, pandas.core.frame.DataFrame, pandas.core.series.Series]) ‑> numpy.ndarray
-
Bring pulse area data inputs to numpy.ndarray format.
Args
input_data
- Input pulse area data.
Allowed types:
numpy.ndarray, list, dict, pandas.DataFrame, pandas.Series
Returns
input_data
- Input data converted to numpy.ndarray.
Expand source code
@staticmethod def inputs_to_numpy(input_data: Union[np.ndarray, list, dict, pd.DataFrame, pd.Series]) -> np.ndarray: """Bring pulse area data inputs to numpy.ndarray format. Args: input_data: Input pulse area data. Allowed types: `numpy.ndarray, list, dict, pandas.DataFrame, pandas.Series` Returns: input_data: Input data converted to numpy.ndarray. """ if type(input_data) == np.ndarray: pass elif type(input_data) == list: input_data = np.asarray(input_data) elif type(input_data) == dict: if 'area' not in input_data.keys(): raise KeyError('No key name `area` found in provided dictionary `input_data`.') elif type(input_data['area']) == np.ndarray: input_data = input_data['area'] elif type(input_data['area']) == list: input_data = np.asarray(input_data['area']) else: raise TypeError('Unsupported type for `input_data`.') elif type(input_data) == pd.DataFrame: if 'area' not in input_data.columns: raise KeyError('No column name `area` found in provided data frame `input_data`.') else: input_data = input_data['area'].to_numpy() elif type(input_data) == pd.Series: input_data = input_data.to_numpy() else: raise TypeError('Unsupported type for `input_data`.') return input_data
def sav_gol_smoothing(input_data: numpy.ndarray) ‑> numpy.ndarray
-
Smooth
input_data
with Savitzky–Golay filter.Args
input_data
- Input threshold dependent occupancy values.
Returns
output_data
- Savitzky–Golay filter smoothed input data.
Expand source code
@staticmethod def sav_gol_smoothing(input_data: np.ndarray) -> np.ndarray: """Smooth `input_data` with Savitzky–Golay filter. Args: input_data: Input threshold dependent occupancy values. Returns: output_data: Savitzky–Golay filter smoothed input data. """ # Set length of the filter window num = len(input_data) - 1 if num % 2 == 0: num = num - 1 # Set order of the polynomial used to fit the samples order = 10 order = min(order, num - 1) # Apply filter output_data = savgol_filter(input_data, num, order) return output_data
Methods
def compute(self, areas_led_on: numpy.ndarray, areas_led_off: numpy.ndarray, adc_to_e: float = nan) ‑> dict
-
Perform full model independent gain and occupancy calculation.
Args
areas_led_on
- Array with 'LED on' data pulse areas.
areas_led_off
- Array with 'LED off' data pulse areas.
adc_to_e
- Conversion factor pulse area in ADC units to charge in units of elementary charge.
Returns
output_dict
- Dictionary with the following keys: {'occupancy': final occupancy estimate; 'occupancy_err': uncertainty final occupancy estimate; 'threshold_occupancy_determination': threshold in 0PE peak area calculations for final occupancy estimate; 'thr_occ_det_integral_fraction': fraction of entries in 'LED off' data below threshold; 'tot_entries_b': total number of waveforms in 'LED off' data, 'iterations': dictionary with the tested thresholds and corresponding occupancy, occupancy uncertainty, and smoothed occupancy values, 'moments_s': dict with first two moments of 'LED on' area distribution, 'moments_b': dict with first two moments of 'LED off' area distribution, 'mean_psi': mean of the single photoelectron response (unconverted gain), 'var_psi': variance of the single photoelectron response, 'mean_psi_stat_err': statistical error of mean_psi, 'mean_psi_sys_err': systematic error of mean_psi, 'mean_psi_err': total error of mean_psi, 'gain': gain values (in units of read out electrons per induced photoelectron), 'gain_stat_err': statistical error gain, 'gain_sys_err': systematic error gain, 'gain_err': total error gain, 'outlier_thresholds': range of area values to be used (excluding outliers), 'histograms:' dictionary with the bin centers and counts for 'LED on' and 'LED off' area histograms with default bin width of 10 }
Expand source code
def compute(self, areas_led_on: np.ndarray, areas_led_off: np.ndarray, adc_to_e: float = np.nan) -> dict: """Perform full model independent gain and occupancy calculation. Args: areas_led_on: Array with 'LED on' data pulse areas. areas_led_off: Array with 'LED off' data pulse areas. adc_to_e: Conversion factor pulse area in ADC units to charge in units of elementary charge. Returns: output_dict: Dictionary with the following keys: {'occupancy': final occupancy estimate; 'occupancy_err': uncertainty final occupancy estimate; 'threshold_occupancy_determination': threshold in 0PE peak area calculations for final occupancy estimate; 'thr_occ_det_integral_fraction': fraction of entries in 'LED off' data below threshold; 'tot_entries_b': total number of waveforms in 'LED off' data, 'iterations': dictionary with the tested thresholds and corresponding occupancy, occupancy uncertainty, and smoothed occupancy values, 'moments_s': dict with first two moments of 'LED on' area distribution, 'moments_b': dict with first two moments of 'LED off' area distribution, 'mean_psi': mean of the single photoelectron response (unconverted gain), 'var_psi': variance of the single photoelectron response, 'mean_psi_stat_err': statistical error of mean_psi, 'mean_psi_sys_err': systematic error of mean_psi, 'mean_psi_err': total error of mean_psi, 'gain': gain values (in units of read out electrons per induced photoelectron), 'gain_stat_err': statistical error gain, 'gain_sys_err': systematic error gain, 'gain_err': total error gain, 'outlier_thresholds': range of area values to be used (excluding outliers), 'histograms:' dictionary with the bin centers and counts for 'LED on' and 'LED off' area histograms with default bin width of 10 } """ # Calculate occupancy and gain occupancy_estimator = self.get_occupancy_model_independent(areas_led_on, areas_led_off) gain_estimator = self.get_gain_model_independent(areas_led_on, areas_led_off, occupancy_estimator) # Convert to gain values (in units of read out electrons per induced photoelectron) if np.isnan(adc_to_e): warnings.warn('Unable to convert gain values as no valid input for adc_to_e provided.') gain_converted = gain_estimator['mean_psi'] * adc_to_e gain_stat_err_converted = gain_estimator['mean_psi_stat_err'] * adc_to_e gain_sys_err_converted = gain_estimator['mean_psi_sys_err'] * adc_to_e gain_err_converted = gain_estimator['mean_psi_err'] * adc_to_e if self.verbose: print('Estimated gain [10^6]: {:.3f} ± {:.3f} (stat) ± {:.3f} (syst)'.format( gain_converted*1e-6, gain_stat_err_converted*1e-6, gain_sys_err_converted*1e-6)) gain_estimator_converted = {'gain': gain_converted, 'gain_stat_err': gain_stat_err_converted, 'gain_sys_err': gain_sys_err_converted, 'gain_err': gain_err_converted} # Generate histograms for later reference hist_bin_centers, hist_counts_led_on = self.get_area_histogram(areas_led_on, bin_width=10, limits=self.outliers_thresholds) hist_bin_centers, hist_counts_led_off = self.get_area_histogram(areas_led_off, bin_width=10, limits=self.outliers_thresholds) histograms = {'histograms': {'bin_centers': hist_bin_centers, 'counts_led_on': hist_counts_led_on, 'counts_led_off': hist_counts_led_off}} # Save everything in a dictionary output_dict = dict(**occupancy_estimator, **gain_estimator, **gain_estimator_converted, **{'outlier_thresholds': self.outliers_thresholds}, **histograms) return output_dict
def get_gain_model_independent(self, areas_led_on: numpy.ndarray, areas_led_off: numpy.ndarray, occupancy_estimator: dict, calc_spe_resolution: bool = True) ‑> dict
-
Calculate model independent gain value.
Args
areas_led_on
- Array with 'LED on' data pulse areas.
areas_led_off
- Array with 'LED off' data pulse areas.
occupancy_estimator
- Output dictionary from
get_occupancy_model_independent
method. Must contain at least the following keys:occupancy
,occupancy_err
,thr_occ_det_integral_fraction
,tot_entries_b
. calc_spe_resolution
- If true, calculate the SPE resolution from mean and variance of the single photoelectron response.
Returns
gain_estimator
- Dictionary with the following keys:
{'moments_s': dict with first two moments of 'LED on' area distribution,
'moments_b': dict with first two moments of 'LED off' area distribution,
'mean_psi': mean of the single photoelectron response (unconverted gain),
'var_psi': variance of the single photoelectron response,
'mean_psi_stat_err': statistical error of mean_psi,
'mean_psi_sys_err': systematic error of mean_psi,
'mean_psi_err': total error of mean_psi,
'spe_resolution': SPE resolution (if
calc_spe_resolution
argument true), 'spe_resolution_stat_err': statistical error of spe_resolution (ifcalc_spe_resolution
argument true), 'spe_resolution_sys_err': statistical error of spe_resolution (ifcalc_spe_resolution
argument true), 'spe_resolution_err': total error of spe_resolution (ifcalc_spe_resolution
argument true)}
Expand source code
def get_gain_model_independent(self, areas_led_on: np.ndarray, areas_led_off: np.ndarray, occupancy_estimator: dict, calc_spe_resolution: bool = True) -> dict: """Calculate model independent gain value. Args: areas_led_on: Array with 'LED on' data pulse areas. areas_led_off: Array with 'LED off' data pulse areas. occupancy_estimator: Output dictionary from `get_occupancy_model_independent` method. Must contain at least the following keys: `occupancy`, `occupancy_err`, `thr_occ_det_integral_fraction`, `tot_entries_b`. calc_spe_resolution: If true, calculate the SPE resolution from mean and variance of the single photoelectron response. Returns: gain_estimator: Dictionary with the following keys: {'moments_s': dict with first two moments of 'LED on' area distribution, 'moments_b': dict with first two moments of 'LED off' area distribution, 'mean_psi': mean of the single photoelectron response (unconverted gain), 'var_psi': variance of the single photoelectron response, 'mean_psi_stat_err': statistical error of mean_psi, 'mean_psi_sys_err': systematic error of mean_psi, 'mean_psi_err': total error of mean_psi, 'spe_resolution': SPE resolution (if `calc_spe_resolution` argument true), 'spe_resolution_stat_err': statistical error of spe_resolution (if `calc_spe_resolution` argument true), 'spe_resolution_sys_err': statistical error of spe_resolution (if `calc_spe_resolution` argument true), 'spe_resolution_err': total error of spe_resolution (if `calc_spe_resolution` argument true)} """ # Get moments of area distributions moments_s = self.get_moments(areas_led_on) mean_s = moments_s['mean'] var_s = moments_s['variance'] moments_b = self.get_moments(areas_led_off) mean_b = moments_b['mean'] var_b = moments_b['variance'] # Get occupancy related values occupancy = occupancy_estimator['occupancy'] occupancy_err = occupancy_estimator['occupancy_err'] f_b = occupancy_estimator['thr_occ_det_integral_fraction'] tot_b = occupancy_estimator['tot_entries_b'] # Calculate first two central moments of the single photoelectron response # and the uncertainties of the mean mean_psi = (mean_s - mean_b) / occupancy var_psi = (var_s - var_b) / occupancy - mean_psi**2 mean_psi_stat_err = np.sqrt((occupancy * (mean_psi**2 + var_psi) + 2 * var_b) / (tot_b * occupancy**2) + ( mean_psi * mean_psi * (np.exp(occupancy) + 1 - 2 * f_b)) / (f_b * tot_b * occupancy**2)) mean_psi_sys_err = (mean_s - mean_b) * occupancy_err / (occupancy**2) mean_psi_err = np.sqrt(mean_psi_stat_err**2 + mean_psi_sys_err**2) gain_estimator = {'moments_s': moments_s, 'moments_b': moments_b, 'mean_psi': mean_psi, 'variance_psi': var_psi, 'mean_psi_stat_err': mean_psi_stat_err, 'mean_psi_sys_err': mean_psi_sys_err, 'mean_psi_err': mean_psi_err} # Calculate SPE resolution if calc_spe_resolution: spe_resolution = np.sqrt(var_psi) / mean_psi var_psi_stat_err = np.sqrt(((mean_psi ** 2 - var_psi) ** 2 * (np.exp(occupancy) + 1 - 2 * f_b)) / ( f_b * tot_b * occupancy ** 2)) spe_resolution_stat_err = spe_resolution / 2 * var_psi_stat_err / var_psi var_psi_sys_err = np.sqrt( ((var_s - var_b) * occupancy_err / (occupancy ** 2)) ** 2 + (2 * mean_psi * mean_psi_sys_err) ** 2) spe_resolution_sys_err = spe_resolution / 2 * var_psi_sys_err / var_psi spe_resolution_err = np.sqrt(spe_resolution_stat_err ** 2 + spe_resolution_sys_err ** 2) gain_estimator.update({'spe_resolution': spe_resolution, 'spe_resolution_stat_err': spe_resolution_stat_err, 'spe_resolution_sys_err': spe_resolution_sys_err, 'spe_resolution_err': spe_resolution_err}) return gain_estimator
def get_occupancy_model_independent(self, areas_led_on: numpy.ndarray, areas_led_off: numpy.ndarray) ‑> dict
-
Calculate the occupancy for the model independent gain calculation.
For a range of thresholds within the 0PE peak areas, calculate the number of entries below threshold for 'LED off' (
integral_b
) and 'LED on' (integral_s
) data areas. The occupancy can be estimated from the number of 'LED on' sample triggers with zero LED-induced photoelectrons, as can be assumed for a sufficiently low threshold forintegral_s
, and the total number of sample triggers, as can be by proportion estimated throughintegral_b
. As the number of photoelectrons produced follows a Poisson distribution, we can use the expression-ln(integral_s/integral_b)
as an estimator for the occupancy for the selected threshold. This threshold is supposed to be sufficiently low such thatintegral_s
is not contaminated by a significant 1PE contribution. We therefore try to find the lowest threshold for which sufficient data points are available below threshold to achieve a relative occupancy error of below 1%. Ideally this is located in a local and global maximum plateau around / slightly below zero areas.Args
areas_led_on
- Array with 'LED on' data pulse areas.
areas_led_off
- Array with 'LED off' data pulse areas.
Returns
occupancy_estimator
- Dictionary with the following keys: {'occupancy': final occupancy estimate; 'occupancy_err': uncertainty final occupancy estimate; 'threshold_occupancy_determination': threshold in 0PE peak area calculations for final occupancy estimate; 'thr_occ_det_integral_fraction': fraction of entries in 'LED off' data below threshold; 'tot_entries_b': total number of waveforms in 'LED off' data, 'iterations': dictionary with the tested thresholds and corresponding occupancy, occupancy uncertainty, and smoothed occupancy values}
Expand source code
def get_occupancy_model_independent(self, areas_led_on: np.ndarray, areas_led_off: np.ndarray) -> dict: """Calculate the occupancy for the model independent gain calculation. For a range of thresholds within the 0PE peak areas, calculate the number of entries below threshold for 'LED off' (`integral_b`) and 'LED on' (`integral_s`) data areas. The occupancy can be estimated from the number of 'LED on' sample triggers with zero LED-induced photoelectrons, as can be assumed for a sufficiently low threshold for `integral_s`, and the total number of sample triggers, as can be by proportion estimated through `integral_b`. As the number of photoelectrons produced follows a Poisson distribution, we can use the expression `-ln(integral_s/integral_b)` as an estimator for the occupancy for the selected threshold. This threshold is supposed to be sufficiently low such that `integral_s` is not contaminated by a significant 1PE contribution. We therefore try to find the lowest threshold for which sufficient data points are available below threshold to achieve a relative occupancy error of below 1%. Ideally this is located in a local and global maximum plateau around / slightly below zero areas. Args: areas_led_on: Array with 'LED on' data pulse areas. areas_led_off: Array with 'LED off' data pulse areas. Returns: occupancy_estimator: Dictionary with the following keys: {'occupancy': final occupancy estimate; 'occupancy_err': uncertainty final occupancy estimate; 'threshold_occupancy_determination': threshold in 0PE peak area calculations for final occupancy estimate; 'thr_occ_det_integral_fraction': fraction of entries in 'LED off' data below threshold; 'tot_entries_b': total number of waveforms in 'LED off' data, 'iterations': dictionary with the tested thresholds and corresponding occupancy, occupancy uncertainty, and smoothed occupancy values} """ # Total number of waveforms in 'LED off' data tot_entries_b = areas_led_off.shape[0] # Define list of thresholds to probe in iterative occupancy determination if self.outliers_thresholds[0] is None: thr_it_occ_det = np.abs(self.get_outlier_bounds(areas_led_on)[0]) else: thr_it_occ_det = np.abs(self.outliers_thresholds[0]) lower_thr_it_occ_det = - thr_it_occ_det / 2 upper_thr_it_occ_det = thr_it_occ_det / 2 list_thr_it_occ_det = np.arange(lower_thr_it_occ_det, upper_thr_it_occ_det + 1) # Initialize lists for resulting occupancies as a function of threshold. # Use lists with append as it showed to be faster than np.arrays in this application. occupancy_list = list() occupancy_err_list = list() thresholds_list = list() f_list = list() # Loop over thresholds and calculate corresponding occupancy and error for threshold in list_thr_it_occ_det: # Calculate number of entries below peak area threshold for 'LED off' (integral_b) and # 'LED on' (integral_s) data. Correction factor on integral_b (and later also f) should # be close to one and only correct for differences due to outlier removal. # If significantly different cardinalities of LED on and off data sets are used, # the error calculation below may become incorrect. integral_b = np.sum(areas_led_off < threshold) * len(areas_led_on) / len(areas_led_off) integral_s = np.sum(areas_led_on < threshold) # Perform occupancy calculations only for positive number of entries below threshold if integral_b > 0 and integral_s > 0: # Fraction of entries in 'LED off' data below threshold f = integral_b / (tot_entries_b * len(areas_led_on) / len(areas_led_off)) # The occupancy can be estimated from the number of 'LED on' sample triggers with zero LED-induced # photoelectrons, as can be assumed for a sufficiently low threshold for integral_s, and the # total number of sample triggers, as can be by proportion estimated through integral_b. # As the number of photoelectrons produced follows a Poisson distribution, we can use the following # expression as an estimator for the occupancy for the selected threshold (which, if sufficiently low # to not be contaminated by a significant 1PE contribution, # should not change the obtained occupancy value). l_val = -np.log(integral_s / integral_b) l_err = np.sqrt((np.exp(l_val) + 1. - 2. * f) / integral_b) # Only consider occupancy values with relative uncertainties below 5%. if l_err / l_val <= 0.05: occupancy_list.append(l_val) occupancy_err_list.append(l_err) thresholds_list.append(threshold) f_list.append(f) if len(occupancy_list) == 0: raise ValueError('No occupancy values determined in threshold iterations.') # Convert to numpy arrays occupancy_list = np.asarray(occupancy_list) occupancy_err_list = np.asarray(occupancy_err_list) thresholds_list = np.asarray(thresholds_list) f_list = np.asarray(f_list) # Smooth threshold-dependent occupancies with Savitzky–Golay filter occupancy_list_smooth = self.sav_gol_smoothing(occupancy_list) # Sort indices in smoothed occupancy list by their element value in descending order occupancy_list_smooth_argsort = occupancy_list_smooth.argsort()[::-1] # Find threshold and corresponding occupancy for highest smoothed occupancy value # for which the relative occupancy error is below 1%. # Ideally this is located in a local and global maximum plateau around / slightly below zero areas # where 1PE contributions are negligible but on the other hand sufficient data points are available # below the threshold to bring the relative uncertainty down to the required level. # If no threshold value fulfils this criterion, return NaN occupancy. occupancy_estimate = np.nan occupancy_estimate_err = np.nan threshold_occ_det = np.nan thr_occ_det_integral_fraction = np.nan for idx in occupancy_list_smooth_argsort: if occupancy_err_list[idx] / occupancy_list[idx] < 0.01: occupancy_estimate = occupancy_list[idx] occupancy_estimate_err = occupancy_err_list[idx] threshold_occ_det = thresholds_list[idx] thr_occ_det_integral_fraction = f_list[idx] break if np.isnan(occupancy_estimate): warnings.warn('Failed to estimate occupancy with the required precision. Returned NaN.') elif occupancy_estimate <= 0: warnings.warn('Warning: Estimated occupancy ({:.3f} ± {:.3f}) seems to be ' 'less than zero.'.format(occupancy_estimate, occupancy_estimate_err)) elif self.verbose: print('Threshold for occupancy estimation: {}'.format(threshold_occ_det)) print('Estimated occupancy: {:.3f} ± {:.3f}'.format(occupancy_estimate, occupancy_estimate_err)) occupancy_estimator = {'occupancy': occupancy_estimate, 'occupancy_err': occupancy_estimate_err, 'threshold_occupancy_determination': threshold_occ_det, 'thr_occ_det_integral_fraction': thr_occ_det_integral_fraction, 'tot_entries_b': tot_entries_b, 'iterations': {'threshold': thresholds_list, 'occupancy': occupancy_list, 'occupancy_err': occupancy_err_list, 'occupancy_smoothed': occupancy_list_smooth}, } return occupancy_estimator