"""Score types for tests that completed successfully.
These include various representations of goodness-of-fit.
"""
import math
import numpy as np
import quantities as pq
from typing import Union
from sciunit import utils
from sciunit import errors
from .base import Score
from .incomplete import InsufficientDataScore
[docs]class BooleanScore(Score):
"""A boolean score, which must be True or False."""
_allowed_types = (bool,)
_description = ('True if the observation and prediction were '
'sufficiently similar; False otherwise')
_best = True
_worst = False
[docs] @classmethod
def compute(cls, observation: dict, prediction: dict) -> 'BooleanScore':
"""Compute whether the observation equals the prediction.
Returns:
BooleanScore: Boolean score of the observation equals the prediction.
"""
return BooleanScore(observation == prediction)
@property
def norm_score(self) -> float:
"""Return 1.0 for a True score and 0.0 for False score.
Returns:
float: 1.0 for a True score and 0.0 for False score.
"""
return 1.0 if self.score else 0.0
[docs] def __str__(self) -> str:
return 'Pass' if self.score else 'Fail'
[docs]class ZScore(Score):
"""A Z score.
A float indicating standardized difference
from a reference mean.
"""
_allowed_types = (float,)
_description = ('The difference between the means of the observation and '
'prediction divided by the standard deviation of the '
'observation')
_best = 0.0 # A Z-Score of 0.0 is best
_worst = np.inf # A Z-score of infinity (or negative infinity) is worst
[docs] @classmethod
def compute(cls, observation: dict, prediction: dict) -> 'ZScore':
"""Compute a z-score from an observation and a prediction.
Returns:
ZScore: The computed Z-Score.
"""
assert isinstance(observation, dict),\
"Observation must be a dict when using ZScore, not type %s" \
% type(observation)
try:
p_value = prediction['mean'] # Use the prediction's mean.
except (TypeError, KeyError, IndexError): # If there isn't one...
try:
p_value = prediction['value'] # Use the prediction's value.
except (TypeError, IndexError): # If there isn't one...
p_value = prediction # Use the prediction (assume numeric).
try:
o_mean = observation['mean']
o_std = observation['std']
except KeyError:
error = ("Observation must have keys 'mean' and 'std' "
"when using ZScore")
return InsufficientDataScore(error)
if o_std <= 0:
error = 'Observation standard deviation must be > 0'
return InsufficientDataScore(error)
value = (p_value - o_mean)/o_std
value = utils.assert_dimensionless(value)
if np.isnan(value):
error = 'One of the input values was NaN'
return InsufficientDataScore(error)
score = ZScore(value)
return score
@property
def norm_score(self) -> float:
"""Return the normalized score.
Equals 1.0 for a z-score of 0, falling to 0.0 for extremely positive
or negative values.
"""
cdf = (1.0 + math.erf(self.score / math.sqrt(2.0))) / 2.0
return 1 - 2*math.fabs(0.5 - cdf)
[docs] def __str__(self) -> str:
return 'Z = %.2f' % self.score
[docs]class CohenDScore(ZScore):
"""A Cohen's D score.
A float indicating difference
between two means normalized by the pooled standard deviation.
"""
_description = ("The Cohen's D between the prediction and the observation")
_best = 0.0
_worst = np.inf
[docs] @classmethod
def compute(cls, observation: dict, prediction: dict) -> 'CohenDScore':
"""Compute a Cohen's D from an observation and a prediction.
Returns:
CohenDScore: The computed Cohen's D Score.
"""
assert isinstance(observation, dict)
assert isinstance(prediction, dict)
p_mean = prediction['mean'] # Use the prediction's mean.
p_std = prediction['std']
o_mean = observation['mean']
o_std = observation['std']
try: # Try to pool taking samples sizes into account.
p_n = prediction['n']
o_n = observation['n']
s = (((p_n-1)*(p_std**2) + (o_n-1)*(o_std**2))/(p_n+o_n-2))**0.5
except KeyError: # If sample sizes are not available.
s = (p_std**2 + o_std**2)**0.5
value = (p_mean - o_mean)/s
value = utils.assert_dimensionless(value)
return CohenDScore(value)
[docs] def __str__(self) -> str:
return 'D = %.2f' % self.score
[docs]class RatioScore(Score):
"""A ratio of two numbers.
Usually the prediction divided by
the observation.
"""
_allowed_types = (float,)
_description = ('The ratio between the prediction and the observation')
_best = 1.0 # A RatioScore of 1.0 is best
_worst = np.inf
[docs] def _check_score(self, score):
if score < 0.0:
raise errors.InvalidScoreError(("RatioScore was initialized with "
"a score of %f, but a RatioScore "
"must be non-negative.") % score)
[docs] @classmethod
def compute(cls, observation: dict, prediction: dict, key=None) -> 'RatioScore':
"""Compute a ratio from an observation and a prediction.
Returns:
RatioScore: A RatioScore of ratio from an observation and a prediction.
"""
assert isinstance(observation, (dict, float, int, pq.Quantity))
assert isinstance(prediction, (dict, float, int, pq.Quantity))
obs, pred = cls.extract_means_or_values(observation, prediction,
key=key)
value = pred / obs
value = utils.assert_dimensionless(value)
return RatioScore(value)
@property
def norm_score(self) -> float:
"""Return 1.0 for a ratio of 1, falling to 0.0 for extremely small or large values.
Returns:
float: The value of the norm score.
"""
score = math.log10(self.score)
cdf = (1.0 + math.erf(score / math.sqrt(2.0))) / 2.0
return 1 - 2*math.fabs(0.5 - cdf)
[docs] def __str__(self):
return 'Ratio = %.2f' % self.score
[docs]class RelativeDifferenceScore(Score):
"""A relative difference between prediction and observation.
The absolute value of the difference between the prediction and the
observation is divided by a reference value with the same units. This
reference scale should be chosen for each test such that normalization
produces directly comparable scores across tests. For example, if 5 volts
represents a medium size difference for TestA, and 10 seconds represents a
medium size difference for TestB, then 5 volts and 10 seconds should be
used for this reference scale in TestA and TestB, respectively. The
attribute `scale` can be passed to the compute method or set for the whole
class in advance. Otherwise, a scale of 1 (in the units of the
observation and prediction) will be used.
"""
_allowed_types = (float,)
_description = ('The relative difference between the prediction and the observation')
_best = 0.0 # A RelativeDifferenceScore of 0.0 is best
_worst = np.inf
scale = None
[docs] def _check_score(self, score):
if score < 0.0:
raise errors.InvalidScoreError(("RelativeDifferenceScore was initialized with "
"a score of %f, but a RelativeDifferenceScore "
"must be non-negative.") % score)
[docs] @classmethod
def compute(cls, observation: Union[dict, float, int, pq.Quantity],
prediction: Union[dict, float, int, pq.Quantity],
key=None,
scale: Union[float, int, pq.Quantity, None] = None) -> 'RelativeDifferenceScore':
"""Compute the relative difference between the observation and a prediction.
Returns:
RelativeDifferenceScore: A relative difference between an observation and a prediction.
"""
assert isinstance(observation, (dict, float, int, pq.Quantity))
assert isinstance(prediction, (dict, float, int, pq.Quantity))
obs, pred = cls.extract_means_or_values(observation, prediction,
key=key)
scale = scale or cls.scale or (obs/float(obs))
assert type(obs) is type(scale)
assert type(obs) is type(pred)
if isinstance(obs, pq.Quantity):
assert obs.units == pred.units, \
"Prediction must have the same units as the observation"
assert obs.units == scale.units, \
"RelativeDifferenceScore.Scale must have the same units as the observation"
assert scale > 0, \
"RelativeDifferenceScore.scale must be positive (not %g)" % scale
value = np.abs(pred - obs) / scale
value = utils.assert_dimensionless(value)
return RelativeDifferenceScore(value)
@property
def norm_score(self) -> float:
"""Return 1.0 for a ratio of 0.0, falling to 0.0 for extremely large values.
Returns:
float: The value of the norm score.
"""
x = self.score
return 1 / (1+x)
[docs] def __str__(self):
return 'Relative Difference = %.2f' % self.score
[docs]class PercentScore(Score):
"""A percent score.
A float in the range [0, 100.0] where higher is better.
"""
_description = ('100.0 is considered perfect agreement between the '
'observation and the prediction. 0.0 is the worst possible'
' agreement')
_best = 100.0
_worst = 0.0
[docs] def _check_score(self, score):
if not (0.0 <= score <= 100.0):
raise errors.InvalidScoreError(("Score of %f must be in "
"range 0.0-100.0" % score))
@property
def norm_score(self) -> float:
"""Return 1.0 for a percent score of 100, and 0.0 for 0.
Returns:
float: 1.0 if the percent score is 100, else 0.0.
"""
return float(self.score)/100
[docs] def __str__(self) -> str:
return '%.1f%%' % self.score
[docs]class FloatScore(Score):
"""A float score.
A float with any value.
"""
_allowed_types = (float, pq.Quantity,)
# The best value is indeterminate without more context.
# But some float value must be supplied to use methods like Test.ace().
_best = 0.0
# The best value is indeterminate without more context.
_worst = 0.0
[docs] def _check_score(self, score):
if isinstance(score, pq.Quantity) and score.size != 1:
raise errors.InvalidScoreError("Score must have size 1.")
_description = ('There is no canonical mapping between this score type and'
' a measure of agreement between the observation and the '
'prediction')
[docs] @classmethod
def compute_ssd(cls, observation: dict, prediction: dict) -> Score:
"""Compute sum-squared diff between observation and prediction.
Args:
observation (dict): The observation to be used for computing the sum-squared diff.
prediction (dict): The prediction to be used for computing the sum-squared diff.
Returns:
Score: The sum-squared diff between observation and prediction.
"""
# The sum of the squared differences.
value = ((observation - prediction)**2).sum()
score = FloatScore(value)
return score
[docs] def __str__(self) -> str:
return '%.3g' % self.score
[docs]class RandomScore(FloatScore):
"""A random score in [0,1].
This has no scientific value and should only be used for debugging
purposes. For example, one might assign a random score under some error
condition to move forward with an application that requires a numeric
score, and use the presence of a RandomScore in the output as an
indication of an internal error.
"""
_allowed_types = (float,)
_description = ('There is a random number in [0,1] and has no relation to '
'the prediction or the observation')
[docs] def __str__(self) -> str:
return '%.3g' % self.score
[docs]class CorrelationScore(Score):
"""A correlation score.
A float in the range [-1.0, 1.0] representing the correlation coefficient.
"""
_description = ('A correlation of -1.0 shows a perfect negative correlation,'
'while a correlation of 1.0 shows a perfect positive correlation.'
'A correlation of 0.0 shows no linear relationship between the movement of the two variables')
_best = 1.0
_worst = -1.0
[docs] def _check_score(self, score):
if not (-1.0 <= score <= 1.0):
raise errors.InvalidScoreError(("Score of %.3g must be in "
"range [-1.0, 1.0]" % score))
[docs] @classmethod
def compute(cls, observation, prediction):
"""Compute whether the observation equals the prediction."""
return CorrelationScore(float(np.corrcoef(observation, prediction)[0, 1]))
[docs] def __str__(self):
return '%.3g' % self.score