Source code for sciunit.scores.complete

"""Score types for tests that completed successfully.

These include various representations of goodness-of-fit.
"""

import math
from typing import Union

import numpy as np
import quantities as pq

from sciunit import errors, utils

from .base import Score
from .incomplete import InsufficientDataScore


[docs]class BooleanScore(Score):
    """A boolean score, which must be True or False."""

    _allowed_types = (bool,)

    _description = (
        "True if the observation and prediction were "
        "sufficiently similar; False otherwise"
    )

    _best = True

    _worst = False

[docs]    @classmethod
    def compute(cls, observation: dict, prediction: dict) -> "BooleanScore":
        """Compute whether the observation equals the prediction.

        Returns:
            BooleanScore: Boolean score of the observation equals the prediction.
        """
        return cls(observation == prediction)

    @property
    def norm_score(self) -> float:
        """Return 1.0 for a True score and 0.0 for False score.

        Returns:
            float: 1.0 for a True score and 0.0 for False score.
        """
        return 1.0 if self.score else 0.0

[docs]    def __str__(self) -> str:
        return "Pass" if self.score else "Fail"


[docs]class ZScore(Score):
    """A Z score.

    A float indicating standardized difference
    from a reference mean.
    """

    _allowed_types = (float,)

    _description = (
        "The difference between the means of the observation and "
        "prediction divided by the standard deviation of the "
        "observation"
    )

    _best = 0.0  # A Z-Score of 0.0 is best

    _worst = np.inf  # A Z-score of infinity (or negative infinity) is worst

    observation_schema = [
        (
            "Mean, Standard Deviation, N",
            {
                "mean": {"units": True, "required": True},
                "std": {"units": True, "min": 0, "required": True},
                "n": {"type": "integer", "min": 1},
            },
        ),
        (
            "Mean, Standard Error, N",
            {
                "mean": {"units": True, "required": True},
                "sem": {"units": True, "min": 0, "required": True},
                "n": {"type": "integer", "min": 1, "required": True},
            },
        ),
    ]

[docs]    @classmethod
    def observation_postprocess(cls, observation: dict) -> dict:
        if "std" not in observation:
            observation["std"] = observation["sem"] * np.sqrt(observation["n"])

[docs]    @classmethod
    def compute(cls, observation: dict, prediction: dict) -> "ZScore":
        """Compute a z-score from an observation and a prediction.

        Returns:
            ZScore: The computed Z-Score.
        """
        assert isinstance(
            observation, dict
        ), "Observation must be a dict when using ZScore, not type %s" % type(
            observation
        )
        try:
            p_value = prediction["mean"]  # Use the prediction's mean.
        except (TypeError, KeyError, IndexError):  # If there isn't one...
            try:
                p_value = prediction["value"]  # Use the prediction's value.
            except (TypeError, IndexError):  # If there isn't one...
                p_value = prediction  # Use the prediction (assume numeric).
        try:
            o_mean = observation["mean"]
            o_std = observation["std"]
        except KeyError:
            error = "Observation must have keys 'mean' and 'std' " "when using ZScore"
            return InsufficientDataScore(error)
        if o_std <= 0:
            error = "Observation standard deviation must be > 0"
            return InsufficientDataScore(error)
        value = (p_value - o_mean) / o_std
        value = utils.assert_dimensionless(value)
        if np.isnan(value):
            error = "One of the input values was NaN"
            return InsufficientDataScore(error)
        score = cls(value)
        return score

    @property
    def norm_score(self) -> float:
        """Return the normalized score.

        Equals 1.0 for a z-score of 0, falling to 0.0 for extremely positive
        or negative values.
        """
        cdf = (1.0 + math.erf(self.score / math.sqrt(2.0))) / 2.0
        return 1 - 2 * math.fabs(0.5 - cdf)

[docs]    def __str__(self) -> str:
        return "Z = %.2f" % self.score


[docs]class CohenDScore(ZScore):
    """A Cohen's D score.

    A float indicating difference
    between two means normalized by the pooled standard deviation.
    """

    _description = "The Cohen's D between the prediction and the observation"

    _best = 0.0

    _worst = np.inf

[docs]    @classmethod
    def compute(cls, observation: dict, prediction: dict) -> "CohenDScore":
        """Compute a Cohen's D from an observation and a prediction.

        Returns:
            CohenDScore: The computed Cohen's D Score.
        """
        assert isinstance(observation, dict)
        assert isinstance(prediction, dict)
        p_mean = prediction["mean"]  # Use the prediction's mean.
        p_std = prediction["std"]
        o_mean = observation["mean"]
        o_std = observation["std"]
        try:  # Try to pool taking samples sizes into account.
            p_n = prediction["n"]
            o_n = observation["n"]
            s = (
                ((p_n - 1) * (p_std ** 2) + (o_n - 1) * (o_std ** 2)) / (p_n + o_n - 2)
            ) ** 0.5
        except KeyError:  # If sample sizes are not available.
            s = (p_std ** 2 + o_std ** 2) ** 0.5
        value = (p_mean - o_mean) / s
        value = utils.assert_dimensionless(value)
        return cls(value)

[docs]    def __str__(self) -> str:
        return "D = %.2f" % self.score


[docs]class RatioScore(Score):
    """A ratio of two numbers.

    Usually the prediction divided by
    the observation.
    """

    _allowed_types = (float,)

    _description = "The ratio between the prediction and the observation"

    _best = 1.0  # A RatioScore of 1.0 is best

    _worst = np.inf

    observation_schema = {"value": {"units": True, "required": True}}

[docs]    def _check_score(self, score):
        if score < 0.0:
            raise errors.InvalidScoreError(
                (
                    "RatioScore was initialized with "
                    "a score of %f, but a RatioScore "
                    "must be non-negative."
                )
                % score
            )

[docs]    @classmethod
    def compute(cls, observation: dict, prediction: dict, key=None) -> "RatioScore":
        """Compute a ratio from an observation and a prediction.

        Returns:
            RatioScore: A RatioScore of ratio from an observation and a prediction.
        """
        assert isinstance(observation, (dict, float, int, pq.Quantity))
        assert isinstance(prediction, (dict, float, int, pq.Quantity))

        obs, pred = cls.extract_means_or_values(observation, prediction, key=key)
        value = pred / obs
        value = utils.assert_dimensionless(value)
        return cls(value)

    @property
    def norm_score(self) -> float:
        """Return 1.0 for a ratio of 1, falling to 0.0 for extremely small or large values.

        Returns:
            float: The value of the norm score.
        """
        score = math.log10(self.score)
        cdf = (1.0 + math.erf(score / math.sqrt(2.0))) / 2.0
        return 1 - 2 * math.fabs(0.5 - cdf)

[docs]    def __str__(self):
        return "Ratio = %.2f" % self.score


[docs]class RelativeDifferenceScore(Score):
    """A relative difference between prediction and observation.

    The absolute value of the difference between the prediction and the
    observation is divided by a reference value with the same units. This
    reference scale should be chosen for each test such that normalization
    produces directly comparable scores across tests. For example, if 5 volts
    represents a medium size difference for TestA, and 10 seconds represents a
    medium size difference for TestB, then 5 volts and 10 seconds should be
    used for this reference scale in TestA and TestB, respectively. The
    attribute `scale` can be passed to the compute method or set for the whole
    class in advance. Otherwise, a scale of 1 (in the units of the
    observation and prediction) will be used.
    """

    _allowed_types = (float,)

    _description = "The relative difference between the prediction and the observation"

    _best = 0.0  # A RelativeDifferenceScore of 0.0 is best

    _worst = np.inf

    scale = None

[docs]    def _check_score(self, score):
        if score < 0.0:
            raise errors.InvalidScoreError(
                (
                    "RelativeDifferenceScore was initialized with "
                    "a score of %f, but a RelativeDifferenceScore "
                    "must be non-negative."
                )
                % score
            )

[docs]    @classmethod
    def compute(
        cls,
        observation: Union[dict, float, int, pq.Quantity],
        prediction: Union[dict, float, int, pq.Quantity],
        key=None,
        scale: Union[float, int, pq.Quantity, None] = None,
    ) -> "RelativeDifferenceScore":
        """Compute the relative difference between the observation and a prediction.

        Returns:
            RelativeDifferenceScore: A relative difference between an observation and a prediction.
        """
        assert isinstance(observation, (dict, float, int, pq.Quantity))
        assert isinstance(prediction, (dict, float, int, pq.Quantity))

        obs, pred = cls.extract_means_or_values(observation, prediction, key=key)

        scale = scale or cls.scale or (obs / float(obs))
        assert type(obs) is type(scale)
        assert type(obs) is type(pred)
        if isinstance(obs, pq.Quantity):
            assert (
                obs.units == pred.units
            ), "Prediction must have the same units as the observation"
            assert (
                obs.units == scale.units
            ), "RelativeDifferenceScore.Scale must have the same units as the observation"
        assert scale > 0, (
            "RelativeDifferenceScore.scale must be positive (not %g)" % scale
        )
        value = np.abs(pred - obs) / scale
        value = utils.assert_dimensionless(value)
        return cls(value)

    @property
    def norm_score(self) -> float:
        """Return 1.0 for a ratio of 0.0, falling to 0.0 for extremely large values.

        Returns:
            float: The value of the norm score.
        """
        x = self.score
        return 1 / (1 + x)

[docs]    def __str__(self):
        return "Relative Difference = %.2f" % self.score


[docs]class PercentScore(Score):
    """A percent score.

    A float in the range [0, 100.0] where higher is better.
    """

    _description = (
        "100.0 is considered perfect agreement between the "
        "observation and the prediction. 0.0 is the worst possible"
        " agreement"
    )

    _best = 100.0

    _worst = 0.0

[docs]    def _check_score(self, score):
        if not (0.0 <= score <= 100.0):
            raise errors.InvalidScoreError(
                ("Score of %f must be in " "range 0.0-100.0" % score)
            )

    @property
    def norm_score(self) -> float:
        """Return 1.0 for a percent score of 100, and 0.0 for 0.

        Returns:
            float: 1.0 if the percent score is 100, else 0.0.
        """
        return float(self.score) / 100

[docs]    def __str__(self) -> str:
        return "%.1f%%" % self.score


[docs]class FloatScore(Score):
    """A float score.

    A float with any value.
    """

    _allowed_types = (
        float,
        pq.Quantity,
    )

    # The best value is indeterminate without more context.
    # But some float value must be supplied to use methods like Test.ace().
    _best = 0.0

    # The best value is indeterminate without more context.
    _worst = 0.0

[docs]    def _check_score(self, score):
        if isinstance(score, pq.Quantity) and score.size != 1:
            raise errors.InvalidScoreError("Score must have size 1.")

    _description = (
        "There is no canonical mapping between this score type and"
        " a measure of agreement between the observation and the "
        "prediction"
    )

[docs]    @classmethod
    def compute_ssd(cls, observation: dict, prediction: dict) -> Score:
        """Compute sum-squared diff between observation and prediction.

        Args:
            observation (dict): The observation to be used for computing the sum-squared diff.
            prediction (dict): The prediction to be used for computing the sum-squared diff.

        Returns:
            Score: The sum-squared diff between observation and prediction.
        """
        # The sum of the squared differences.
        value = ((observation - prediction) ** 2).sum()
        score = cls(value)
        return score

[docs]    def __str__(self) -> str:
        return "%.3g" % self.score


[docs]class RandomScore(FloatScore):
    """A random score in [0,1].

    This has no scientific value and should only be used for debugging
    purposes. For example, one might assign a random score under some error
    condition to move forward with an application that requires a numeric
    score, and use the presence of a RandomScore in the output as an
    indication of an internal error.
    """

    _allowed_types = (float,)

    _description = (
        "There is a random number in [0,1] and has no relation to "
        "the prediction or the observation"
    )

[docs]    def __str__(self) -> str:
        return "%.3g" % self.score


[docs]class CorrelationScore(Score):
    """A correlation score.
    A float in the range [-1.0, 1.0] representing the correlation coefficient.
    """

    _description = (
        "A correlation of -1.0 shows a perfect negative correlation,"
        "while a correlation of 1.0 shows a perfect positive correlation."
        "A correlation of 0.0 shows no linear relationship between the movement of the two variables"
    )

    _best = 1.0

    _worst = -1.0

[docs]    def _check_score(self, score):
        if not (-1.0 <= score <= 1.0):
            raise errors.InvalidScoreError(
                ("Score of %.3g must be in " "range [-1.0, 1.0]" % score)
            )

[docs]    @classmethod
    def compute(cls, observation, prediction):
        """Compute whether the observation equals the prediction."""
        return cls(float(np.corrcoef(observation, prediction)[0, 1]))

[docs]    def __str__(self):
        return "%.3g" % self.score