Source code for columnflow.calibration.cms.jets

# coding: utf-8

"""
Jet energy corrections and jet resolution smearing.
"""

import functools

import law

from columnflow.types import Any
from columnflow.calibration import Calibrator, calibrator
from columnflow.calibration.util import ak_random, propagate_met, sum_transverse
from columnflow.production.util import attach_coffea_behavior
from columnflow.util import maybe_import, InsertableDict, DotDict
from columnflow.columnar_util import set_ak_column, layout_ak_array, optional_column as optional

np = maybe_import("numpy")
ak = maybe_import("awkward")
correctionlib = maybe_import("correctionlib")

logger = law.logger.get_logger(__name__)


#
# helper functions
#

set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32)


[docs]def get_evaluators( correction_set: correctionlib.highlevel.CorrectionSet, names: list[str], ) -> list[Any]: """ Helper function to get a list of correction evaluators from a :external+correctionlib:py:class:`correctionlib.highlevel.CorrectionSet` object given a list of *names*. The *names* can refer to either simple or compound corrections. :param correction_set: evaluator provided by :external+correctionlib:doc:`index` :param names: List of names of corrections to be applied :raises RuntimeError: If a requested correction in *names* is not available :return: List of compounded corrections, see :external+correctionlib:py:class:`correctionlib.highlevel.CorrectionSet` """ # raise nice error if keys not found available_keys = set(correction_set.keys()).union(correction_set.compound.keys()) missing_keys = set(names) - available_keys if missing_keys: raise RuntimeError("corrections not found:" + "".join( f"\n - {name}" for name in names if name in missing_keys ) + "\navailable:" + "".join( f"\n - {name}" for name in sorted(available_keys) )) # retrieve the evaluators return [ correction_set.compound[name] if name in correction_set.compound else correction_set[name] for name in names ]
[docs]def ak_evaluate(evaluator: correctionlib.highlevel.Correction, *args) -> float: """ Evaluate a :external+correctionlib:py:class:`correctionlib.highlevel.Correction` using one or more :external+ak:py:class:`awkward arrays <ak.Array>` as inputs. :param evaluator: Evaluator instance :raises ValueError: If no :external+ak:py:class:`awkward arrays <ak.Array>` are provided :return: The correction factor derived from the input arrays """ # fail if no arguments if not args: raise ValueError("Expected at least one argument.") # collect arguments that are awkward arrays ak_args = [ arg for arg in args if isinstance(arg, ak.Array) ] # broadcast akward arrays together and flatten if ak_args: bc_args = ak.broadcast_arrays(*ak_args) flat_args = ( np.asarray(ak.flatten(bc_arg, axis=None)) for bc_arg in bc_args ) output_layout_array = bc_args[0] else: flat_args = iter(()) output_layout_array = None # multiplex flattened and non-awkward inputs all_flat_args = [ next(flat_args) if isinstance(arg, ak.Array) else arg for arg in args ] # apply evaluator to flattened/multiplexed inputs result = evaluator.evaluate(*all_flat_args) # apply broadcasted layout to result if output_layout_array is not None: result = layout_ak_array(result, output_layout_array) return result
# # jet energy corrections # # define default functions for jec calibrator def get_jerc_file_default(self: Calibrator, external_files: DotDict) -> str: """ Function to obtain external correction files for JEC and/or JER. By default, this function extracts the location of the jec correction files from the current config instance *config_inst*. The key of the external file depends on the jet collection. For ``Jet`` (AK4 jets), this resolves to ``jet_jerc``, and for ``FatJet`` it is resolved to ``fat_jet_jerc``. .. code-block:: python cfg.x.external_files = DotDict.wrap({ "jet_jerc": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/JME/2017_UL/jet_jerc.json.gz", "fat_jet_jerc": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/JME/2017_UL/fatJet_jerc.json.gz", }) :param external_files: Dictionary containing the information about the file location :return: path or url to correction file(s) """ # noqa # get config try_attrs = ("get_jec_config", "get_jer_config") jerc_config = None for try_attr in try_attrs: try: jerc_config = getattr(self, try_attr)() except AttributeError: continue else: break # fail if not found if jerc_config is None: raise ValueError( "could not retrieve jer/jec config, none of the following methods " f"were found: {try_attrs}", ) # first check config for user-supplied `external_file_key` ext_file_key = jerc_config.get("external_file_key", None) if ext_file_key is not None: return external_files[ext_file_key] # if not found, try to resolve from jet collection name and fail if not standard NanoAOD if self.jet_name not in get_jerc_file_default.map_jet_name_file_key: available_keys = ", ".join(sorted(get_jerc_file_default.map_jet_name_file_key)) raise ValueError( f"could not determine external file key for jet collection '{self.jet_name}', " f"name is not one of standard NanoAOD jet collections: {available_keys}", ) # return external file ext_file_key = get_jerc_file_default.map_jet_name_file_key[self.jet_name] return external_files[ext_file_key] # default external file keys for known jet collections get_jerc_file_default.map_jet_name_file_key = { "Jet": "jet_jerc", "FatJet": "fat_jet_jerc", } def get_jec_config_default(self: Calibrator) -> DotDict: """ Load config relevant to the jet energy corrections (JEC). By default, this is extracted from the current *config_inst*, assuming the JEC configurations are stored under the 'jec' aux key. Separate configurations should be specified for each jet collection, using the collection name as a key. For example, the configuration for the default jet collection ``Jet`` will be retrieved from the following config entry: .. code-block:: python self.config_inst.x.jec.Jet Used in :py:meth:`~.jec.setup_func`. :return: Dictionary containing configuration for jet energy calibration """ jec_cfg = self.config_inst.x.jec # check for old-style config if self.jet_name not in jec_cfg: # if jet collection is `Jet`, issue deprecation warning if self.jet_name == "Jet": logger.warning_once( f"{id(self)}_depr_jec_config", "config aux 'jec' does not contain key for input jet " f"collection '{self.jet_name}'. This may be due to " "an outdated config. Continuing under the assumption that " "the entire 'jec' entry refers to this jet collection. " "This assumption will be removed in future versions of " "columnflow, so please adapt the config according to the " "documentation to remove this warning and ensure future " "compatibility of the code.", ) return jec_cfg # otherwise raise exception raise ValueError( "config aux 'jec' does not contain key for input jet " f"collection '{self.jet_name}'.", ) return jec_cfg[self.jet_name]
[docs]@calibrator( uses={ optional("fixedGridRhoFastjetAll"), optional("Rho.fixedGridRhoFastjetAll"), attach_coffea_behavior, }, # name of the jet collection to calibrate jet_name="Jet", # name of the associated MET collection met_name="MET", # name of the associated Raw MET collection raw_met_name="RawMET", # custom uncertainty sources, defaults to config when empty uncertainty_sources=None, # toggle for propagation to MET propagate_met=True, # function to determine the correction file get_jec_file=get_jerc_file_default, # function to determine the jec configuration dict get_jec_config=get_jec_config_default, ) def jec( self: Calibrator, events: ak.Array, min_pt_met_prop: float = 15.0, max_eta_met_prop: float = 5.2, **kwargs, ) -> ak.Array: """Performs the jet energy corrections (JECs) and uncertainty shifts using the :external+correctionlib:doc:`index`, optionally propagating the changes to the MET. The *jet_name* should be set to the name of the NanoAOD jet collection to calibrate (default: ``Jet``, i.e. AK4 jets). Requires an external file in the config pointing to the JSON files containing the JECs. The file key can be specified via an optional ``external_file_key`` in the ``jec`` config entry. If not given, the file key will be determined automatically based on the jet collection name: ``jet_jerc`` for ``Jet`` (AK4 jets), ``fat_jet_jerc`` for``FatJet`` (AK8 jets). A full set of JSON files can be specified as: .. code-block:: python cfg.x.external_files = DotDict.wrap({ "jet_jerc": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/JME/2017_UL/jet_jerc.json.gz", "fat_jet_jerc": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/JME/2017_UL/fatJet_jerc.json.gz", }) For more file-grained control, the *get_jec_file* can be adapted in a subclass in case it is stored differently in the external files The JEC configuration should be an auxiliary entry in the config, specifying the correction details under "jec". Separate configs should be given for each jet collection to calibrate, using the jet collection name as a subkey. An example of a valid configuration for correction AK4 jets with JEC is: .. code-block:: python cfg.x.jec = { "Jet": { "campaign": "Summer19UL17", "version": "V5", "jet_type": "AK4PFchs", "levels": ["L1L2L3Res"], # or individual correction levels "levels_for_type1_met": ["L1FastJet"], "uncertainty_sources": [ "Total", "CorrelationGroupMPFInSitu", "CorrelationGroupIntercalibration", "CorrelationGroupbJES", "CorrelationGroupFlavor", "CorrelationGroupUncorrelated", ] }, } *get_jec_config* can be adapted in a subclass in case it is stored differently in the config. If running on data, the datasets must have an auxiliary field *jec_era* defined, e.g. "RunF", or an auxiliary field *era*, e.g. "F". This instance of :py:class:`~columnflow.calibration.Calibrator` is initialized with the following parameters by default: :param events: awkward array containing events to process :param min_pt_met_prop: If *propagate_met* variable is ``True`` propagate the updated jet values to the missing transverse energy (MET) using :py:func:`~columnflow.calibration.util.propagate_met` for events where ``met.pt > *min_pt_met_prop*``. :param max_eta_met_prop: If *propagate_met* variable is ``True`` propagate the updated jet values to the missing transverse energy (MET) using :py:func:`~columnflow.calibration.util.propagate_met` for events where ``met.eta > *min_eta_met_prop*``. """ # noqa # use local variable for convenience jet_name = self.jet_name # calculate uncorrected pt, mass events = set_ak_column_f32(events, f"{jet_name}.pt_raw", events[jet_name].pt * (1 - events[jet_name].rawFactor)) events = set_ak_column_f32(events, f"{jet_name}.mass_raw", events[jet_name].mass * (1 - events[jet_name].rawFactor)) def correct_jets(*, pt, eta, phi, area, rho, evaluator_key="jec"): # variable naming convention variable_map = { "JetA": area, "JetEta": eta, "JetPt": pt, "JetPhi": phi, "Rho": ak.values_astype(rho, np.float32), } # apply all correctors sequentially, updating the pt each time full_correction = ak.ones_like(pt, dtype=np.float32) for corrector in self.evaluators[evaluator_key]: # determine correct inputs (change depending on corrector) inputs = [ variable_map[inp.name] for inp in corrector.inputs ] correction = ak_evaluate(corrector, *inputs) # update pt for subsequent correctors variable_map["JetPt"] = variable_map["JetPt"] * correction full_correction = full_correction * correction return full_correction # obtain rho, which might be located at different routes, depending on the nano version rho = ( events.fixedGridRhoFastjetAll if "fixedGridRhoFastjetAll" in events.fields else events.Rho.fixedGridRhoFastjetAll ) # correct jets with only a subset of correction levels # (for calculating TypeI MET correction) if self.propagate_met: # get correction factors jec_factors_subset_type1_met = correct_jets( pt=events[jet_name].pt_raw, eta=events[jet_name].eta, phi=events[jet_name].phi, area=events[jet_name].area, rho=rho, evaluator_key="jec_subset_type1_met", ) # temporarily apply the new factors with only subset of corrections events = set_ak_column_f32(events, f"{jet_name}.pt", events[jet_name].pt_raw * jec_factors_subset_type1_met) events = set_ak_column_f32(events, f"{jet_name}.mass", events[jet_name].mass_raw * jec_factors_subset_type1_met) events = self[attach_coffea_behavior](events, collections=[jet_name], **kwargs) # store pt and phi of the full jet system for MET propagation, including a selection in raw info # see https://twiki.cern.ch/twiki/bin/view/CMS/JECAnalysesRecommendations?rev=19#Minimum_jet_selection_cuts met_prop_mask = (events[jet_name].pt_raw > min_pt_met_prop) & (abs(events[jet_name].eta) < max_eta_met_prop) jetsum = events[jet_name][met_prop_mask].sum(axis=1) jetsum_pt_subset_type1_met = jetsum.pt jetsum_phi_subset_type1_met = jetsum.phi # factors for full jet correction with all levels jec_factors = correct_jets( pt=events[jet_name].pt_raw, eta=events[jet_name].eta, phi=events[jet_name].phi, area=events[jet_name].area, rho=rho, evaluator_key="jec", ) # apply full jet correction events = set_ak_column_f32(events, f"{jet_name}.pt", events[jet_name].pt_raw * jec_factors) events = set_ak_column_f32(events, f"{jet_name}.mass", events[jet_name].mass_raw * jec_factors) rawFactor = ak.nan_to_num(1 - events[jet_name].pt_raw / events[jet_name].pt, nan=0.0) events = set_ak_column_f32(events, f"{jet_name}.rawFactor", rawFactor) events = self[attach_coffea_behavior](events, collections=[jet_name], **kwargs) # nominal met propagation if self.propagate_met: # get pt and phi of all jets after correcting jetsum = events[jet_name][met_prop_mask].sum(axis=1) jetsum_pt_all_levels = jetsum.pt jetsum_phi_all_levels = jetsum.phi # propagate changes to MET, starting from jets corrected with subset of JEC levels # (recommendation is to propagate only L2 corrections and onwards) met_pt, met_phi = propagate_met( jetsum_pt_subset_type1_met, jetsum_phi_subset_type1_met, jetsum_pt_all_levels, jetsum_phi_all_levels, events[self.raw_met_name].pt, events[self.raw_met_name].phi, ) events = set_ak_column_f32(events, f"{self.met_name}.pt", met_pt) events = set_ak_column_f32(events, f"{self.met_name}.phi", met_phi) # variable naming conventions variable_map = { "JetEta": events[jet_name].eta, "JetPt": events[jet_name].pt_raw, } # jet energy uncertainty components for name, evaluator in self.evaluators["junc"].items(): # get uncertainty inputs = [variable_map[inp.name] for inp in evaluator.inputs] jec_uncertainty = ak_evaluate(evaluator, *inputs) # apply jet uncertainty shifts events = set_ak_column_f32( events, f"{jet_name}.pt_jec_{name}_up", events[jet_name].pt * (1.0 + jec_uncertainty), ) events = set_ak_column_f32( events, f"{jet_name}.pt_jec_{name}_down", events[jet_name].pt * (1.0 - jec_uncertainty), ) events = set_ak_column_f32( events, f"{jet_name}.mass_jec_{name}_up", events[jet_name].mass * (1.0 + jec_uncertainty), ) events = set_ak_column_f32( events, f"{jet_name}.mass_jec_{name}_down", events[jet_name].mass * (1.0 - jec_uncertainty), ) # propagate shifts to MET if self.propagate_met: jet_pt_up = events[jet_name][met_prop_mask][f"pt_jec_{name}_up"] jet_pt_down = events[jet_name][met_prop_mask][f"pt_jec_{name}_down"] met_pt_up, met_phi_up = propagate_met( jetsum_pt_all_levels, jetsum_phi_all_levels, jet_pt_up, events[jet_name][met_prop_mask].phi, met_pt, met_phi, ) met_pt_down, met_phi_down = propagate_met( jetsum_pt_all_levels, jetsum_phi_all_levels, jet_pt_down, events[jet_name][met_prop_mask].phi, met_pt, met_phi, ) events = set_ak_column_f32(events, f"{self.met_name}.pt_jec_{name}_up", met_pt_up) events = set_ak_column_f32(events, f"{self.met_name}.pt_jec_{name}_down", met_pt_down) events = set_ak_column_f32(events, f"{self.met_name}.phi_jec_{name}_up", met_phi_up) events = set_ak_column_f32(events, f"{self.met_name}.phi_jec_{name}_down", met_phi_down) return events
@jec.init def jec_init(self: Calibrator) -> None: jec_cfg = self.get_jec_config() sources = self.uncertainty_sources if sources is None: sources = jec_cfg.uncertainty_sources or [] self.uncertainty_sources = sources # register used jet columns self.uses.add(f"{self.jet_name}.{{pt,eta,phi,mass,area,rawFactor}}") # register produced jet columns self.produces.add(f"{self.jet_name}.{{pt,mass,rawFactor}}") # add shifted jet variables self.produces |= { f"{self.jet_name}.{shifted_var}_jec_{junc_name}_{junc_dir}" for shifted_var in ("pt", "mass") for junc_name in sources for junc_dir in ("up", "down") } # add MET variables if self.propagate_met: self.uses.add(f"{self.raw_met_name}.{{pt,phi}}") self.produces.add(f"{self.met_name}.{{pt,phi}}") # add shifted MET variables self.produces |= { f"{self.met_name}.{shifted_var}_jec_{junc_name}_{junc_dir}" for shifted_var in ("pt", "phi") for junc_name in sources for junc_dir in ("up", "down") } @jec.requires def jec_requires(self: Calibrator, reqs: dict) -> None: if "external_files" in reqs: return from columnflow.tasks.external import BundleExternalFiles reqs["external_files"] = BundleExternalFiles.req(self.task) @jec.setup def jec_setup(self: Calibrator, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None: """ Load the correct jec files using the :py:func:`from_string` method of the :external+correctionlib:py:class:`correctionlib.highlevel.CorrectionSet` function and apply the corrections as needed. The source files for the :external+correctionlib:py:class:`correctionlib.highlevel.CorrectionSet` instance are extracted with the :py:meth:`~.jec.get_jec_file`. Uses the member function :py:meth:`~.jec.get_jec_config` to construct the required keys, which are based on the following information about the JEC: - levels - campaign - version - jet_type A corresponding example snippet wihtin the *config_inst* could like something like this: .. code-block:: python cfg.x.jec = DotDict.wrap({ "Jet": { # campaign name for this JEC correctiono "campaign": f"Summer19UL{year2}{jerc_postfix}", # version of the corrections "version": "V7", # Type of jets that the corrections should be applied on "jet_type": "AK4PFchs", # relevant levels in the derivation process of the JEC "levels": ["L1FastJet", "L2Relative", "L2L3Residual", "L3Absolute"], # relevant levels in the derivation process of the Type 1 MET JEC "levels_for_type1_met": ["L1FastJet"], # names of the uncertainties to be applied "uncertainty_sources": [ "Total", "CorrelationGroupMPFInSitu", "CorrelationGroupIntercalibration", "CorrelationGroupbJES", "CorrelationGroupFlavor", "CorrelationGroupUncorrelated", ], }, }) :param reqs: Requirement dictionary for this :py:class:`~columnflow.calibration.Calibrator` instance :param inputs: Additional inputs, currently not used :param reader_targets: TODO: add documentation """ bundle = reqs["external_files"] # import the correction sets from the external file import correctionlib correction_set = correctionlib.CorrectionSet.from_string( self.get_jec_file(bundle.files).load(formatter="gzip").decode("utf-8"), ) # compute JEC keys from config information jec_cfg = self.get_jec_config() def make_jme_keys(names, jec=jec_cfg, is_data=self.dataset_inst.is_data): if is_data: jec_era = self.dataset_inst.get_aux("jec_era", None) # if no special JEC era is specified, infer based on 'era' if jec_era is None: jec_era = "Run" + self.dataset_inst.get_aux("era") return [ f"{jec.campaign}_{jec_era}_{jec.version}_DATA_{name}_{jec.jet_type}" if is_data else f"{jec.campaign}_{jec.version}_MC_{name}_{jec.jet_type}" for name in names ] jec_keys = make_jme_keys(jec_cfg.levels) jec_keys_subset_type1_met = make_jme_keys(jec_cfg.levels_for_type1_met) junc_keys = make_jme_keys(self.uncertainty_sources, is_data=False) # uncertainties only stored as MC keys # store the evaluators self.evaluators = { "jec": get_evaluators(correction_set, jec_keys), "jec_subset_type1_met": get_evaluators(correction_set, jec_keys_subset_type1_met), "junc": dict(zip(self.uncertainty_sources, get_evaluators(correction_set, junc_keys))), } # custom jec calibrator that only runs nominal correction jec_nominal = jec.derive("jec_nominal", cls_dict={"uncertainty_sources": []}) # explicit calibrators for standard jet collections jec_ak4 = jec.derive("jec_ak4", cls_dict={"jet_name": "Jet"}) jec_ak8 = jec.derive("jec_ak8", cls_dict={"jet_name": "FatJet", "propagate_met": False}) jec_ak4_nominal = jec_ak4.derive("jec_ak4", cls_dict={"uncertainty_sources": []}) jec_ak8_nominal = jec_ak8.derive("jec_ak8", cls_dict={"uncertainty_sources": []}) def get_jer_config_default(self: Calibrator) -> DotDict: """ Load config relevant to the jet energy resolution (JER) smearing. By default, this is extracted from the current *config_inst*, assuming the JER configurations are stored under the 'jer' aux key. Separate configurations should be specified for each jet collection, using the collection name as a key. For example, the configuration for the default jet collection ``Jet`` will be retrieved from the following config entry: .. code-block:: python self.config_inst.x.jer.Jet Used in :py:meth:`~.jer.setup_func`. :return: Dictionary containing configuration for JER smearing """ jer_cfg = self.config_inst.x.jer # check for old-style config if self.jet_name not in jer_cfg: # if jet collection is `Jet`, issue deprecation warning if self.jet_name == "Jet": logger.warning_once( f"{id(self)}_depr_jer_config", "config aux 'jer' does not contain key for input jet " f"collection '{self.jet_name}'. This may be due to " "an outdated config. Continuing under the assumption that " "the entire 'jer' entry refers to this jet collection. " "This assumption will be removed in future versions of " "columnflow, so please adapt the config according to the " "documentation to remove this warning and ensure future " "compatibility of the code.", ) return jer_cfg # otherwise raise exception raise ValueError( "config aux 'jer' does not contain key for input jet " f"collection '{self.jet_name}'.", ) return jer_cfg[self.jet_name] # # jet energy resolution smearing #
[docs]@calibrator( uses={ optional("Rho.fixedGridRhoFastjetAll"), optional("fixedGridRhoFastjetAll"), attach_coffea_behavior, }, # name of the jet collection to smear jet_name="Jet", # name of the associated gen jet collection gen_jet_name="GenJet", # name of the associated MET collection met_name="MET", # toggle for propagation to MET propagate_met=True, # only run on mc mc_only=True, # use deterministic seeds for random smearing and # take the "index"-th random number per seed when not -1 deterministic_seed_index=-1, # function to determine the correction file get_jer_file=get_jerc_file_default, # function to determine the jer configuration dict get_jer_config=get_jer_config_default, # function to determine the jec configuration dict get_jec_config=get_jec_config_default, # jec uncertainty sources to propagate jer to, defaults to config when empty jec_uncertainty_sources=None, # whether gen jet matching should be performed relative to the nominal jet pt, or the jec varied values gen_jet_matching_nominal=False, ) def jer(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: """ Applies the jet energy resolution smearing in MC and calculates the associated uncertainty shifts using the :external+correctionlib:doc:`index`, following the recommendations given in https://twiki.cern.ch/twiki/bin/viewauth/CMS/JetResolution. The *jet_name* and *gen_jet_name* should be set to the name of the NanoAOD jet and gen jet collections to use as an input for JER smearing (default: ``Jet`` and ``GenJet``, respectively, i.e. AK4 jets). Requires an external file in the config pointing to the JSON files containing the JER information. The file key can be specified via an optional ``external_file_key`` in the ``jer`` config entry. If not given, the file key will be determined automatically based on the jet collection name: ``jet_jerc`` for ``Jet`` (AK4 jets), ``fat_jet_jerc`` for``FatJet`` (AK8 jets). A full set of JSON files can be specified as: .. code-block:: python cfg.x.external_files = DotDict.wrap({ "jet_jerc": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/JME/2017_UL/jet_jerc.json.gz", "fat_jet_jerc": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/JME/2017_UL/fatJet_jerc.json.gz", }) For more fine-grained control, the *get_jer_file* can be adapted in a subclass in case it is stored differently in the external files. The JER smearing configuration should be an auxiliary entry in the config, specifying the input JER to use under "jer". Separate configs should be given for each jet collection to smear, using the jet collection name as a subkey. An example of a valid configuration for smearing AK4 jets with JER is: .. code-block:: python cfg.x.jer = { "Jet": { "campaign": "Summer19UL17", "version": "JRV2", "jet_type": "AK4PFchs", }, } *get_jer_config* can be adapted in a subclass in case it is stored differently in the config. The nominal JER smearing is performed on nominal jets as well as those varied as a result of jet energy corrections. For this purpose, *get_jec_config* and *jec_uncertainty_sources* can be defined to control the considered variations. Consequently, the matching of jets to gen jets which depends on pt values of the former is subject to a choice regarding which pt values to use. If *gen_jet_matching_nominal* is *True*, the nominal pt values are used, and the jec varied pt values otherwise. Throws an error if running on data. :param events: awkward array containing events to process """ # noqa # use local variables for convenience jet_name = self.jet_name gen_jet_name = self.gen_jet_name met_name = self.met_name # fail when running on data if self.dataset_inst.is_data: raise ValueError("attempt to apply jet energy resolution smearing in data") # prepare variations jer_nom, jer_up, jer_down = self.jer_variations # save the unsmeared properties in case they are needed later events = set_ak_column_f32(events, f"{jet_name}.pt_unsmeared", events[jet_name].pt) events = set_ak_column_f32(events, f"{jet_name}.mass_unsmeared", events[jet_name].mass) # normally distributed random numbers per jet for use in stochastic smearing below random_normal = ( ak_random(0, 1, events[jet_name].deterministic_seed, rand_func=self.deterministic_normal) if self.deterministic_seed_index >= 0 else ak_random(0, 1, rand_func=np.random.Generator( np.random.SFC64(events.event.to_list())).normal, ) ) # obtain rho, which might be located at different routes, depending on the nano version rho = ( events.fixedGridRhoFastjetAll if "fixedGridRhoFastjetAll" in events.fields else events.Rho.fixedGridRhoFastjetAll ) # prepare evaluator variables variable_map = { "JetEta": events[jet_name].eta, "JetPt": events[jet_name].pt, "Rho": rho, "systematic": jer_nom, } # extract nominal pt resolution inputs = [variable_map[inp.name] for inp in self.evaluators["jer"].inputs] jerpt = {jer_nom: ak_evaluate(self.evaluators["jer"], *inputs)} # for simplifications below, use the same values for jer variations jerpt[jer_up] = jerpt[jer_nom] jerpt[jer_down] = jerpt[jer_nom] # extract pt resolutions evaluted for jec uncertainties for jec_var in self.jec_variations: _variable_map = variable_map | {"JetPt": events[jet_name][f"pt_{jec_var}"]} inputs = [_variable_map[inp.name] for inp in self.evaluators["jer"].inputs] jerpt[jec_var] = ak_evaluate(self.evaluators["jer"], *inputs) # extract scale factors jersf = {} for jer_var in self.jer_variations: _variable_map = variable_map | {"systematic": jer_var} inputs = [_variable_map[inp.name] for inp in self.evaluators["sf"].inputs] jersf[jer_var] = ak_evaluate(self.evaluators["sf"], *inputs) # extract scale factors for jec uncertainties for jec_var in self.jec_variations: _variable_map = variable_map | {"JetPt": events[jet_name][f"pt_{jec_var}"]} inputs = [_variable_map[inp.name] for inp in self.evaluators["sf"].inputs] jersf[jec_var] = ak_evaluate(self.evaluators["sf"], *inputs) # array with all JER scale factor variations as an additional axis # (note: axis needs to be regular for broadcasting to work correctly) jerpt = ak.concatenate( [jerpt[v][..., None] for v in self.jer_variations + self.jec_variations], axis=-1, ) jersf = ak.concatenate( [jersf[v][..., None] for v in self.jer_variations + self.jec_variations], axis=-1, ) # -- stochastic smearing # scale random numbers according to JER SF jersf2_m1 = jersf**2 - 1 add_smear = np.sqrt(ak.where(jersf2_m1 < 0, 0, jersf2_m1)) # compute smearing factors (stochastic method) smear_factors_stochastic = 1.0 + random_normal * jerpt * add_smear # -- scaling method (using gen match) # mask negative gen jet indices (= no gen match) gen_jet_idx = events[jet_name][self.gen_jet_idx_column] valid_gen_jet_idxs = ak.mask(gen_jet_idx, gen_jet_idx >= 0) # pad list of gen jets to prevent index error on match lookup max_gen_jet_idx = ak.max(valid_gen_jet_idxs) padded_gen_jets = ak.pad_none( events[gen_jet_name], 0 if max_gen_jet_idx is None else (max_gen_jet_idx + 1), ) # gen jets that match the reconstructed jets matched_gen_jet = padded_gen_jets[valid_gen_jet_idxs] # compute the relative (reco - gen) pt difference if self.gen_jet_matching_nominal: # use nominal pt for matching match_pt = events[jet_name].pt else: # concatenate varied pt values for broadcasting pt_names = ["pt" for _ in self.jer_variations] + [f"pt_{jec_var}" for jec_var in self.jec_variations] match_pt = ak.concatenate([events[jet_name][pt_name][..., None] for pt_name in pt_names], axis=-1) pt_relative_diff = 1 - matched_gen_jet.pt / match_pt # test if matched gen jets are within 3 * resolution # (no check for Delta-R matching criterion; we assume this was done during nanoAOD production to get the genJetIdx) is_matched_pt = np.abs(pt_relative_diff) < 3 * jerpt is_matched_pt = ak.fill_none(is_matched_pt, False) # masked values = no gen match # compute smearing factors (scaling method) smear_factors_scaling = 1.0 + (jersf - 1.0) * pt_relative_diff # -- hybrid smearing: take smear factors from scaling if there was a match, # otherwise take the stochastic ones smear_factors = ak.where(is_matched_pt, smear_factors_scaling, smear_factors_stochastic) # ensure array is not nullable (avoid ambiguity on Arrow/Parquet conversion) smear_factors = ak.fill_none(smear_factors, 0.0) # to allow for code simplifications below, store the reference pt and mass columns upon which the smearing is based # into the events array for cases where it shouldn't already exist for direction in ["up", "down"]: events = set_ak_column_f32(events, f"{jet_name}.pt_jer_{direction}", events[jet_name].pt) events = set_ak_column_f32(events, f"{jet_name}.mass_jer_{direction}", events[jet_name].mass) # when propagating met, do the same for respective columns if self.propagate_met: events = set_ak_column_f32(events, f"{met_name}.pt_jer_{direction}", events[met_name].pt) events = set_ak_column_f32(events, f"{met_name}.phi_jer_{direction}", events[met_name].phi) # when propagating met, before smearing is applied, store pt and phi of the full jet system for all variations using # string postfixes as keys if self.propagate_met: jetsum_pt_before = {} jetsum_phi_before = {} for postfix in self.postfixes: jetsum_pt_before[postfix], jetsum_phi_before[postfix] = sum_transverse( events[jet_name][f"pt{postfix}"], events[jet_name].phi, ) # apply the smearing # (note: this requires that postfixes and smear_factors have the same order, but this should be the case) for i, postfix in enumerate(self.postfixes): pt_name = f"pt{postfix}" m_name = f"mass{postfix}" events = set_ak_column_f32(events, f"{jet_name}.{pt_name}", events[jet_name][pt_name] * smear_factors[..., i]) events = set_ak_column_f32(events, f"{jet_name}.{m_name}", events[jet_name][m_name] * smear_factors[..., i]) # recover coffea behavior events = self[attach_coffea_behavior](events, collections=[jet_name], **kwargs) # met propagation if self.propagate_met: # save unsmeared quantities events = set_ak_column_f32(events, f"{met_name}.pt_unsmeared", events[met_name].pt) events = set_ak_column_f32(events, f"{met_name}.phi_unsmeared", events[met_name].phi) # propagate per variation for postfix in self.postfixes: # get pt and phi of all jets after correcting jetsum_pt_after, jetsum_phi_after = sum_transverse( events[jet_name][f"pt{postfix}"], events[jet_name].phi, ) # propagate changes to MET met_pt, met_phi = propagate_met( jetsum_pt_before[postfix], jetsum_phi_before[postfix], jetsum_pt_after, jetsum_phi_after, events[met_name][f"pt{postfix}"], events[met_name][f"phi{postfix}"], ) events = set_ak_column_f32(events, f"{met_name}.pt{postfix}", met_pt) events = set_ak_column_f32(events, f"{met_name}.phi{postfix}", met_phi) return events
@jer.init def jer_init(self: Calibrator) -> None: # add jec_cfg for applying nominal smearing to jec variations jec_cfg = self.get_jec_config() jec_sources = self.jec_uncertainty_sources if jec_sources is None: jec_sources = jec_cfg.uncertainty_sources or [] self.jec_uncertainty_sources = jec_sources # prepare jec variations self.jec_variations = sum(([f"jec_{unc}_up", f"jec_{unc}_down"] for unc in self.jec_uncertainty_sources), []) jet_jec_columns = {f"{self.jet_name}.{{pt,mass}}_{jec_source}" for jec_source in self.jec_variations} met_jec_columns = {f"{self.met_name}.{{pt,phi}}_{jec_source}" for jec_source in self.jec_variations} # determine gen-level jet index column lower_first = lambda s: s[0].lower() + s[1:] if s else s self.gen_jet_idx_column = lower_first(self.gen_jet_name) + "Idx" # prepare jer variations and postfixes self.jer_variations = ["nom", "up", "down"] self.postfixes = ["", "_jer_up", "_jer_down"] + [f"_{jec_var}" for jec_var in self.jec_variations] # register used jet columns self.uses.add(f"{self.jet_name}.{{pt,eta,phi,mass,{self.gen_jet_idx_column}}}") self.uses.add(f"{self.gen_jet_name}.{{pt,eta,phi}}") if jec_sources: self.uses |= jet_jec_columns # register produced jet columns self.produces.add(f"{self.jet_name}.{{pt,mass}}{{,_unsmeared,_jer_up,_jer_down}}") if jec_sources: self.produces |= jet_jec_columns # additional columns when propagating MET if self.propagate_met: # register used MET columns self.uses.add(f"{self.met_name}.{{pt,phi}}") if jec_sources: self.uses |= met_jec_columns # register produced MET columns self.produces.add(f"{self.met_name}.{{pt,phi}}{{,_jer_up,_jer_down,_unsmeared}}") if jec_sources: self.produces |= met_jec_columns @jer.requires def jer_requires(self: Calibrator, reqs: dict) -> None: if "external_files" in reqs: return from columnflow.tasks.external import BundleExternalFiles reqs["external_files"] = BundleExternalFiles.req(self.task) @jer.setup def jer_setup(self: Calibrator, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None: """ Load the correct jer files using the :py:func:`from_string` method of the :external+correctionlib:py:class:`correctionlib.highlevel.CorrectionSet` function and apply the corrections as needed. The source files for the :external+correctionlib:py:class:`correctionlib.highlevel.CorrectionSet` instance are extracted with the :py:meth:`~.jer.get_jer_file`. Uses the member function :py:meth:`~.jer.get_jer_config` to construct the required keys, which are based on the following information about the JER: - campaign - version - jet_type A corresponding example snippet within the *config_inst* could like something like this: .. code-block:: python cfg.x.jer = DotDict.wrap({ "Jet": { "campaign": f"Summer19UL{year2}{jerc_postfix}", "version": "JRV3", "jet_type": "AK4PFchs", }, }) :param reqs: Requirement dictionary for this :py:class:`~columnflow.calibration.Calibrator` instance. :param inputs: Additional inputs, currently not used. :param reader_targets: TODO: add documentation. """ bundle = reqs["external_files"] # import the correction sets from the external file import correctionlib correction_set = correctionlib.CorrectionSet.from_string( self.get_jer_file(bundle.files).load(formatter="gzip").decode("utf-8"), ) # compute JER keys from config information jer_cfg = self.get_jer_config() jer_keys = { "jer": f"{jer_cfg.campaign}_{jer_cfg.version}_MC_PtResolution_{jer_cfg.jet_type}", "sf": f"{jer_cfg.campaign}_{jer_cfg.version}_MC_ScaleFactor_{jer_cfg.jet_type}", } # store the evaluators self.evaluators = { name: get_evaluators(correction_set, [key])[0] for name, key in jer_keys.items() } # use deterministic seeds for random smearing if requested if self.deterministic_seed_index >= 0: idx = self.deterministic_seed_index bit_generator = np.random.SFC64 def deterministic_normal(loc, scale, seed): return np.asarray([ np.random.Generator(bit_generator(_seed)).normal(_loc, _scale, size=idx + 1)[-1] for _loc, _scale, _seed in zip(loc, scale, seed) ]) self.deterministic_normal = deterministic_normal # explicit calibrators for standard jet collections jer_ak4 = jer.derive("jer_ak4", cls_dict={"jet_name": "Jet", "gen_jet_name": "GenJet"}) jer_ak8 = jer.derive("jer_ak8", cls_dict={"jet_name": "FatJet", "gen_jet_name": "GenJetAK8", "propagate_met": False}) # # single calibrator for doing both JEC and JER smearing #
[docs]@calibrator( uses={jec, jer}, produces={jec, jer}, # name of the jet collection to smear jet_name="Jet", # name of the associated gen jet collection (for JER smearing) gen_jet_name="GenJet", # toggle for propagation to MET propagate_met=None, # functions to determine configs and files get_jec_file=None, get_jec_config=None, get_jer_file=None, get_jer_config=None, ) def jets(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: """ Instance of :py:class:`~columnflow.calibration.Calibrator` that does all relevant calibrations for jets, i.e. JEC and JER. For more information, see :py:func:`~.jec` and :py:func:`~.jer`. :param events: awkward array containing events to process """ # apply jet energy corrections events = self[jec](events, **kwargs) # apply jer smearing on MC only if self.dataset_inst.is_mc: events = self[jer](events, **kwargs) return events
@jets.init def jets_init(self: Calibrator) -> None: # forward argument to the producers self.deps_kwargs[jec]["jet_name"] = self.jet_name self.deps_kwargs[jer]["jet_name"] = self.jet_name self.deps_kwargs[jer]["gen_jet_name"] = self.gen_jet_name if self.propagate_met is not None: self.deps_kwargs[jec]["propagate_met"] = self.propagate_met self.deps_kwargs[jer]["propagate_met"] = self.propagate_met if self.get_jec_file is not None: self.deps_kwargs[jec]["get_jec_file"] = self.get_jec_file if self.get_jec_config is not None: self.deps_kwargs[jec]["get_jec_config"] = self.get_jec_config if self.get_jer_file is not None: self.deps_kwargs[jer]["get_jer_file"] = self.get_jer_file if self.get_jer_config is not None: self.deps_kwargs[jer]["get_jer_config"] = self.get_jer_config # explicit calibrators for standard jet collections jets_ak4 = jets.derive("jets_ak4", cls_dict={"jet_name": "Jet", "gen_jet_name": "GenJet"}) jets_ak8 = jets.derive("jets_ak8", cls_dict={"jet_name": "FatJet", "gen_jet_name": "GenJetAK8"})