Source code for columnflow.production.categories

# coding: utf-8

"""
Column production methods related defining categories.
"""

from __future__ import annotations

import functools
import operator

import law

from columnflow.categorization import Categorizer
from columnflow.production import Producer, producer
from columnflow.util import maybe_import
from columnflow.columnar_util import set_ak_column, ak_concatenate_safe

np = maybe_import("numpy")
ak = maybe_import("awkward")


logger = law.logger.get_logger(__name__)



[docs]
@producer(
    produces={"category_ids"},
    # custom function to skip categorizers
    skip_category=(lambda self, category_inst: False),
)
def category_ids(
    self: Producer,
    events: ak.Array,
    target_events: ak.Array | None = None,
    **kwargs,
) -> ak.Array:
    """
    Assigns each event an array of category ids.
    """
    # evaluate all unique categorizers, storing their returned masks
    cat_masks = {}
    for categorizer in self.unique_categorizers:
        events, mask = self[categorizer](events, **kwargs)
        cat_masks[categorizer] = mask

    # loop through categories and construct mask over all categorizers
    category_ids = []
    for cat_inst, categorizers in self.categorizer_map.items():
        cat_mask = functools.reduce(
            operator.and_,
            (cat_masks[c] for c in categorizers),
            np.ones(len(events), dtype=bool),
        )

        # covert to nullable array with the category ids or none, then apply ak.singletons
        ids = ak.where(cat_mask, np.float64(cat_inst.id), np.float64(np.nan))
        category_ids.append(ak.singletons(ak.nan_to_none(ids)))

    # combine
    category_ids = ak_concatenate_safe(category_ids, axis=1)

    # save, optionally on a target events array
    if target_events is None:
        target_events = events
    target_events = set_ak_column(target_events, "category_ids", category_ids, value_type=np.int64)

    return target_events



@category_ids.init
def category_ids_init(self: Producer, **kwargs) -> None:
    super(category_ids, self).init_func(**kwargs)

    # store a mapping from leaf category to categorizer classes for faster lookup
    self.categorizer_map = {}

    # add all categorizers obtained from leaf category selection expressions to the used columns
    for cat_inst in self.config_inst.get_leaf_categories():
        # check if skipped
        if self.skip_category(cat_inst):
            continue

        # treat all selections as lists of categorizers
        for sel in law.util.flatten(cat_inst.selection):
            if Categorizer.derived_by(sel):
                categorizer = sel
            elif Categorizer.has_cls(sel):
                categorizer = Categorizer.get_cls(sel)
            else:
                raise Exception(
                    f"selection '{sel}' of category '{cat_inst.name}' cannot be resolved to an existing Categorizer",
                )

            # the categorizer must be exposed
            if not categorizer.exposed:
                raise RuntimeError(f"cannot use unexposed categorizer '{categorizer}' to evaluate category {cat_inst}")

            # update dependency sets
            self.uses.add(categorizer)
            self.produces.add(categorizer)

            self.categorizer_map.setdefault(cat_inst, []).append(categorizer)

    # store a list of unique categorizers
    self.unique_categorizers = law.util.make_unique(sum(self.categorizer_map.values(), []))