Source code for sciunit.scores.complete

"""Score types for tests that completed successfully.

These include various representations of goodness-of-fit.
"""

import math
from typing import Union

import numpy as np
import quantities as pq

from sciunit import errors, utils

from .base import Score
from .incomplete import InsufficientDataScore


[docs]class BooleanScore(Score): """A boolean score, which must be True or False.""" _allowed_types = (bool,) _description = ( "True if the observation and prediction were " "sufficiently similar; False otherwise" ) _best = True _worst = False
[docs] @classmethod def compute(cls, observation: dict, prediction: dict) -> "BooleanScore": """Compute whether the observation equals the prediction. Returns: BooleanScore: Boolean score of the observation equals the prediction. """ return cls(observation == prediction)
@property def norm_score(self) -> float: """Return 1.0 for a True score and 0.0 for False score. Returns: float: 1.0 for a True score and 0.0 for False score. """ return 1.0 if self.score else 0.0
[docs] def __str__(self) -> str: return "Pass" if self.score else "Fail"
[docs]class ZScore(Score): """A Z score. A float indicating standardized difference from a reference mean. """ _allowed_types = (float,) _description = ( "The difference between the means of the observation and " "prediction divided by the standard deviation of the " "observation" ) _best = 0.0 # A Z-Score of 0.0 is best _worst = np.inf # A Z-score of infinity (or negative infinity) is worst observation_schema = [ ( "Mean, Standard Deviation, N", { "mean": {"units": True, "required": True}, "std": {"units": True, "min": 0, "required": True}, "n": {"type": "integer", "min": 1}, }, ), ( "Mean, Standard Error, N", { "mean": {"units": True, "required": True}, "sem": {"units": True, "min": 0, "required": True}, "n": {"type": "integer", "min": 1, "required": True}, }, ), ]
[docs] @classmethod def observation_postprocess(cls, observation: dict) -> dict: if "std" not in observation: observation["std"] = observation["sem"] * np.sqrt(observation["n"])
[docs] @classmethod def compute(cls, observation: dict, prediction: dict) -> "ZScore": """Compute a z-score from an observation and a prediction. Returns: ZScore: The computed Z-Score. """ assert isinstance( observation, dict ), "Observation must be a dict when using ZScore, not type %s" % type( observation ) try: p_value = prediction["mean"] # Use the prediction's mean. except (TypeError, KeyError, IndexError): # If there isn't one... try: p_value = prediction["value"] # Use the prediction's value. except (TypeError, IndexError): # If there isn't one... p_value = prediction # Use the prediction (assume numeric). try: o_mean = observation["mean"] o_std = observation["std"] except KeyError: error = "Observation must have keys 'mean' and 'std' " "when using ZScore" return InsufficientDataScore(error) if o_std <= 0: error = "Observation standard deviation must be > 0" return InsufficientDataScore(error) value = (p_value - o_mean) / o_std value = utils.assert_dimensionless(value) if np.isnan(value): error = "One of the input values was NaN" return InsufficientDataScore(error) score = cls(value) return score
@property def norm_score(self) -> float: """Return the normalized score. Equals 1.0 for a z-score of 0, falling to 0.0 for extremely positive or negative values. """ cdf = (1.0 + math.erf(self.score / math.sqrt(2.0))) / 2.0 return 1 - 2 * math.fabs(0.5 - cdf)
[docs] def __str__(self) -> str: return "Z = %.2f" % self.score
[docs]class CohenDScore(ZScore): """A Cohen's D score. A float indicating difference between two means normalized by the pooled standard deviation. """ _description = "The Cohen's D between the prediction and the observation" _best = 0.0 _worst = np.inf
[docs] @classmethod def compute(cls, observation: dict, prediction: dict) -> "CohenDScore": """Compute a Cohen's D from an observation and a prediction. Returns: CohenDScore: The computed Cohen's D Score. """ assert isinstance(observation, dict) assert isinstance(prediction, dict) p_mean = prediction["mean"] # Use the prediction's mean. p_std = prediction["std"] o_mean = observation["mean"] o_std = observation["std"] try: # Try to pool taking samples sizes into account. p_n = prediction["n"] o_n = observation["n"] s = ( ((p_n - 1) * (p_std ** 2) + (o_n - 1) * (o_std ** 2)) / (p_n + o_n - 2) ) ** 0.5 except KeyError: # If sample sizes are not available. s = (p_std ** 2 + o_std ** 2) ** 0.5 value = (p_mean - o_mean) / s value = utils.assert_dimensionless(value) return cls(value)
[docs] def __str__(self) -> str: return "D = %.2f" % self.score
[docs]class RatioScore(Score): """A ratio of two numbers. Usually the prediction divided by the observation. """ _allowed_types = (float,) _description = "The ratio between the prediction and the observation" _best = 1.0 # A RatioScore of 1.0 is best _worst = np.inf observation_schema = {"value": {"units": True, "required": True}}
[docs] def _check_score(self, score): if score < 0.0: raise errors.InvalidScoreError( ( "RatioScore was initialized with " "a score of %f, but a RatioScore " "must be non-negative." ) % score )
[docs] @classmethod def compute(cls, observation: dict, prediction: dict, key=None) -> "RatioScore": """Compute a ratio from an observation and a prediction. Returns: RatioScore: A RatioScore of ratio from an observation and a prediction. """ assert isinstance(observation, (dict, float, int, pq.Quantity)) assert isinstance(prediction, (dict, float, int, pq.Quantity)) obs, pred = cls.extract_means_or_values(observation, prediction, key=key) value = pred / obs value = utils.assert_dimensionless(value) return cls(value)
@property def norm_score(self) -> float: """Return 1.0 for a ratio of 1, falling to 0.0 for extremely small or large values. Returns: float: The value of the norm score. """ score = math.log10(self.score) cdf = (1.0 + math.erf(score / math.sqrt(2.0))) / 2.0 return 1 - 2 * math.fabs(0.5 - cdf)
[docs] def __str__(self): return "Ratio = %.2f" % self.score
[docs]class RelativeDifferenceScore(Score): """A relative difference between prediction and observation. The absolute value of the difference between the prediction and the observation is divided by a reference value with the same units. This reference scale should be chosen for each test such that normalization produces directly comparable scores across tests. For example, if 5 volts represents a medium size difference for TestA, and 10 seconds represents a medium size difference for TestB, then 5 volts and 10 seconds should be used for this reference scale in TestA and TestB, respectively. The attribute `scale` can be passed to the compute method or set for the whole class in advance. Otherwise, a scale of 1 (in the units of the observation and prediction) will be used. """ _allowed_types = (float,) _description = "The relative difference between the prediction and the observation" _best = 0.0 # A RelativeDifferenceScore of 0.0 is best _worst = np.inf scale = None
[docs] def _check_score(self, score): if score < 0.0: raise errors.InvalidScoreError( ( "RelativeDifferenceScore was initialized with " "a score of %f, but a RelativeDifferenceScore " "must be non-negative." ) % score )
[docs] @classmethod def compute( cls, observation: Union[dict, float, int, pq.Quantity], prediction: Union[dict, float, int, pq.Quantity], key=None, scale: Union[float, int, pq.Quantity, None] = None, ) -> "RelativeDifferenceScore": """Compute the relative difference between the observation and a prediction. Returns: RelativeDifferenceScore: A relative difference between an observation and a prediction. """ assert isinstance(observation, (dict, float, int, pq.Quantity)) assert isinstance(prediction, (dict, float, int, pq.Quantity)) obs, pred = cls.extract_means_or_values(observation, prediction, key=key) scale = scale or cls.scale or (obs / float(obs)) assert type(obs) is type(scale) assert type(obs) is type(pred) if isinstance(obs, pq.Quantity): assert ( obs.units == pred.units ), "Prediction must have the same units as the observation" assert ( obs.units == scale.units ), "RelativeDifferenceScore.Scale must have the same units as the observation" assert scale > 0, ( "RelativeDifferenceScore.scale must be positive (not %g)" % scale ) value = np.abs(pred - obs) / scale value = utils.assert_dimensionless(value) return cls(value)
@property def norm_score(self) -> float: """Return 1.0 for a ratio of 0.0, falling to 0.0 for extremely large values. Returns: float: The value of the norm score. """ x = self.score return 1 / (1 + x)
[docs] def __str__(self): return "Relative Difference = %.2f" % self.score
[docs]class PercentScore(Score): """A percent score. A float in the range [0, 100.0] where higher is better. """ _description = ( "100.0 is considered perfect agreement between the " "observation and the prediction. 0.0 is the worst possible" " agreement" ) _best = 100.0 _worst = 0.0
[docs] def _check_score(self, score): if not (0.0 <= score <= 100.0): raise errors.InvalidScoreError( ("Score of %f must be in " "range 0.0-100.0" % score) )
@property def norm_score(self) -> float: """Return 1.0 for a percent score of 100, and 0.0 for 0. Returns: float: 1.0 if the percent score is 100, else 0.0. """ return float(self.score) / 100
[docs] def __str__(self) -> str: return "%.1f%%" % self.score
[docs]class FloatScore(Score): """A float score. A float with any value. """ _allowed_types = ( float, pq.Quantity, ) # The best value is indeterminate without more context. # But some float value must be supplied to use methods like Test.ace(). _best = 0.0 # The best value is indeterminate without more context. _worst = 0.0
[docs] def _check_score(self, score): if isinstance(score, pq.Quantity) and score.size != 1: raise errors.InvalidScoreError("Score must have size 1.")
_description = ( "There is no canonical mapping between this score type and" " a measure of agreement between the observation and the " "prediction" )
[docs] @classmethod def compute_ssd(cls, observation: dict, prediction: dict) -> Score: """Compute sum-squared diff between observation and prediction. Args: observation (dict): The observation to be used for computing the sum-squared diff. prediction (dict): The prediction to be used for computing the sum-squared diff. Returns: Score: The sum-squared diff between observation and prediction. """ # The sum of the squared differences. value = ((observation - prediction) ** 2).sum() score = cls(value) return score
[docs] def __str__(self) -> str: return "%.3g" % self.score
[docs]class RandomScore(FloatScore): """A random score in [0,1]. This has no scientific value and should only be used for debugging purposes. For example, one might assign a random score under some error condition to move forward with an application that requires a numeric score, and use the presence of a RandomScore in the output as an indication of an internal error. """ _allowed_types = (float,) _description = ( "There is a random number in [0,1] and has no relation to " "the prediction or the observation" )
[docs] def __str__(self) -> str: return "%.3g" % self.score
[docs]class CorrelationScore(Score): """A correlation score. A float in the range [-1.0, 1.0] representing the correlation coefficient. """ _description = ( "A correlation of -1.0 shows a perfect negative correlation," "while a correlation of 1.0 shows a perfect positive correlation." "A correlation of 0.0 shows no linear relationship between the movement of the two variables" ) _best = 1.0 _worst = -1.0
[docs] def _check_score(self, score): if not (-1.0 <= score <= 1.0): raise errors.InvalidScoreError( ("Score of %.3g must be in " "range [-1.0, 1.0]" % score) )
[docs] @classmethod def compute(cls, observation, prediction): """Compute whether the observation equals the prediction.""" return cls(float(np.corrcoef(observation, prediction)[0, 1]))
[docs] def __str__(self): return "%.3g" % self.score