Source code for sciunit.scores.complete

"""Score types for tests that completed successfully.

These include various representations of goodness-of-fit.
"""

import math

import numpy as np
import quantities as pq
from typing import Union

from sciunit import utils
from sciunit import errors
from .base import Score
from .incomplete import InsufficientDataScore


[docs]class BooleanScore(Score): """A boolean score, which must be True or False.""" _allowed_types = (bool,) _description = ('True if the observation and prediction were ' 'sufficiently similar; False otherwise') _best = True _worst = False
[docs] @classmethod def compute(cls, observation: dict, prediction: dict) -> 'BooleanScore': """Compute whether the observation equals the prediction. Returns: BooleanScore: Boolean score of the observation equals the prediction. """ return BooleanScore(observation == prediction)
@property def norm_score(self) -> float: """Return 1.0 for a True score and 0.0 for False score. Returns: float: 1.0 for a True score and 0.0 for False score. """ return 1.0 if self.score else 0.0
[docs] def __str__(self) -> str: return 'Pass' if self.score else 'Fail'
[docs]class ZScore(Score): """A Z score. A float indicating standardized difference from a reference mean. """ _allowed_types = (float,) _description = ('The difference between the means of the observation and ' 'prediction divided by the standard deviation of the ' 'observation') _best = 0.0 # A Z-Score of 0.0 is best _worst = np.inf # A Z-score of infinity (or negative infinity) is worst
[docs] @classmethod def compute(cls, observation: dict, prediction: dict) -> 'ZScore': """Compute a z-score from an observation and a prediction. Returns: ZScore: The computed Z-Score. """ assert isinstance(observation, dict),\ "Observation must be a dict when using ZScore, not type %s" \ % type(observation) try: p_value = prediction['mean'] # Use the prediction's mean. except (TypeError, KeyError, IndexError): # If there isn't one... try: p_value = prediction['value'] # Use the prediction's value. except (TypeError, IndexError): # If there isn't one... p_value = prediction # Use the prediction (assume numeric). try: o_mean = observation['mean'] o_std = observation['std'] except KeyError: error = ("Observation must have keys 'mean' and 'std' " "when using ZScore") return InsufficientDataScore(error) if o_std <= 0: error = 'Observation standard deviation must be > 0' return InsufficientDataScore(error) value = (p_value - o_mean)/o_std value = utils.assert_dimensionless(value) if np.isnan(value): error = 'One of the input values was NaN' return InsufficientDataScore(error) score = ZScore(value) return score
@property def norm_score(self) -> float: """Return the normalized score. Equals 1.0 for a z-score of 0, falling to 0.0 for extremely positive or negative values. """ cdf = (1.0 + math.erf(self.score / math.sqrt(2.0))) / 2.0 return 1 - 2*math.fabs(0.5 - cdf)
[docs] def __str__(self) -> str: return 'Z = %.2f' % self.score
[docs]class CohenDScore(ZScore): """A Cohen's D score. A float indicating difference between two means normalized by the pooled standard deviation. """ _description = ("The Cohen's D between the prediction and the observation") _best = 0.0 _worst = np.inf
[docs] @classmethod def compute(cls, observation: dict, prediction: dict) -> 'CohenDScore': """Compute a Cohen's D from an observation and a prediction. Returns: CohenDScore: The computed Cohen's D Score. """ assert isinstance(observation, dict) assert isinstance(prediction, dict) p_mean = prediction['mean'] # Use the prediction's mean. p_std = prediction['std'] o_mean = observation['mean'] o_std = observation['std'] try: # Try to pool taking samples sizes into account. p_n = prediction['n'] o_n = observation['n'] s = (((p_n-1)*(p_std**2) + (o_n-1)*(o_std**2))/(p_n+o_n-2))**0.5 except KeyError: # If sample sizes are not available. s = (p_std**2 + o_std**2)**0.5 value = (p_mean - o_mean)/s value = utils.assert_dimensionless(value) return CohenDScore(value)
[docs] def __str__(self) -> str: return 'D = %.2f' % self.score
[docs]class RatioScore(Score): """A ratio of two numbers. Usually the prediction divided by the observation. """ _allowed_types = (float,) _description = ('The ratio between the prediction and the observation') _best = 1.0 # A RatioScore of 1.0 is best _worst = np.inf
[docs] def _check_score(self, score): if score < 0.0: raise errors.InvalidScoreError(("RatioScore was initialized with " "a score of %f, but a RatioScore " "must be non-negative.") % score)
[docs] @classmethod def compute(cls, observation: dict, prediction: dict, key=None) -> 'RatioScore': """Compute a ratio from an observation and a prediction. Returns: RatioScore: A RatioScore of ratio from an observation and a prediction. """ assert isinstance(observation, (dict, float, int, pq.Quantity)) assert isinstance(prediction, (dict, float, int, pq.Quantity)) obs, pred = cls.extract_means_or_values(observation, prediction, key=key) value = pred / obs value = utils.assert_dimensionless(value) return RatioScore(value)
@property def norm_score(self) -> float: """Return 1.0 for a ratio of 1, falling to 0.0 for extremely small or large values. Returns: float: The value of the norm score. """ score = math.log10(self.score) cdf = (1.0 + math.erf(score / math.sqrt(2.0))) / 2.0 return 1 - 2*math.fabs(0.5 - cdf)
[docs] def __str__(self): return 'Ratio = %.2f' % self.score
[docs]class RelativeDifferenceScore(Score): """A relative difference between prediction and observation. The absolute value of the difference between the prediction and the observation is divided by a reference value with the same units. This reference scale should be chosen for each test such that normalization produces directly comparable scores across tests. For example, if 5 volts represents a medium size difference for TestA, and 10 seconds represents a medium size difference for TestB, then 5 volts and 10 seconds should be used for this reference scale in TestA and TestB, respectively. The attribute `scale` can be passed to the compute method or set for the whole class in advance. Otherwise, a scale of 1 (in the units of the observation and prediction) will be used. """ _allowed_types = (float,) _description = ('The relative difference between the prediction and the observation') _best = 0.0 # A RelativeDifferenceScore of 0.0 is best _worst = np.inf scale = None
[docs] def _check_score(self, score): if score < 0.0: raise errors.InvalidScoreError(("RelativeDifferenceScore was initialized with " "a score of %f, but a RelativeDifferenceScore " "must be non-negative.") % score)
[docs] @classmethod def compute(cls, observation: Union[dict, float, int, pq.Quantity], prediction: Union[dict, float, int, pq.Quantity], key=None, scale: Union[float, int, pq.Quantity, None] = None) -> 'RelativeDifferenceScore': """Compute the relative difference between the observation and a prediction. Returns: RelativeDifferenceScore: A relative difference between an observation and a prediction. """ assert isinstance(observation, (dict, float, int, pq.Quantity)) assert isinstance(prediction, (dict, float, int, pq.Quantity)) obs, pred = cls.extract_means_or_values(observation, prediction, key=key) scale = scale or cls.scale or (obs/float(obs)) assert type(obs) is type(scale) assert type(obs) is type(pred) if isinstance(obs, pq.Quantity): assert obs.units == pred.units, \ "Prediction must have the same units as the observation" assert obs.units == scale.units, \ "RelativeDifferenceScore.Scale must have the same units as the observation" assert scale > 0, \ "RelativeDifferenceScore.scale must be positive (not %g)" % scale value = np.abs(pred - obs) / scale value = utils.assert_dimensionless(value) return RelativeDifferenceScore(value)
@property def norm_score(self) -> float: """Return 1.0 for a ratio of 0.0, falling to 0.0 for extremely large values. Returns: float: The value of the norm score. """ x = self.score return 1 / (1+x)
[docs] def __str__(self): return 'Relative Difference = %.2f' % self.score
[docs]class PercentScore(Score): """A percent score. A float in the range [0, 100.0] where higher is better. """ _description = ('100.0 is considered perfect agreement between the ' 'observation and the prediction. 0.0 is the worst possible' ' agreement') _best = 100.0 _worst = 0.0
[docs] def _check_score(self, score): if not (0.0 <= score <= 100.0): raise errors.InvalidScoreError(("Score of %f must be in " "range 0.0-100.0" % score))
@property def norm_score(self) -> float: """Return 1.0 for a percent score of 100, and 0.0 for 0. Returns: float: 1.0 if the percent score is 100, else 0.0. """ return float(self.score)/100
[docs] def __str__(self) -> str: return '%.1f%%' % self.score
[docs]class FloatScore(Score): """A float score. A float with any value. """ _allowed_types = (float, pq.Quantity,) # The best value is indeterminate without more context. # But some float value must be supplied to use methods like Test.ace(). _best = 0.0 # The best value is indeterminate without more context. _worst = 0.0
[docs] def _check_score(self, score): if isinstance(score, pq.Quantity) and score.size != 1: raise errors.InvalidScoreError("Score must have size 1.")
_description = ('There is no canonical mapping between this score type and' ' a measure of agreement between the observation and the ' 'prediction')
[docs] @classmethod def compute_ssd(cls, observation: dict, prediction: dict) -> Score: """Compute sum-squared diff between observation and prediction. Args: observation (dict): The observation to be used for computing the sum-squared diff. prediction (dict): The prediction to be used for computing the sum-squared diff. Returns: Score: The sum-squared diff between observation and prediction. """ # The sum of the squared differences. value = ((observation - prediction)**2).sum() score = FloatScore(value) return score
[docs] def __str__(self) -> str: return '%.3g' % self.score
[docs]class RandomScore(FloatScore): """A random score in [0,1]. This has no scientific value and should only be used for debugging purposes. For example, one might assign a random score under some error condition to move forward with an application that requires a numeric score, and use the presence of a RandomScore in the output as an indication of an internal error. """ _allowed_types = (float,) _description = ('There is a random number in [0,1] and has no relation to ' 'the prediction or the observation')
[docs] def __str__(self) -> str: return '%.3g' % self.score
[docs]class CorrelationScore(Score): """A correlation score. A float in the range [-1.0, 1.0] representing the correlation coefficient. """ _description = ('A correlation of -1.0 shows a perfect negative correlation,' 'while a correlation of 1.0 shows a perfect positive correlation.' 'A correlation of 0.0 shows no linear relationship between the movement of the two variables') _best = 1.0 _worst = -1.0
[docs] def _check_score(self, score): if not (-1.0 <= score <= 1.0): raise errors.InvalidScoreError(("Score of %.3g must be in " "range [-1.0, 1.0]" % score))
[docs] @classmethod def compute(cls, observation, prediction): """Compute whether the observation equals the prediction.""" return CorrelationScore(float(np.corrcoef(observation, prediction)[0, 1]))
[docs] def __str__(self): return '%.3g' % self.score