"""
.. Dstl (c) Crown Copyright 2019
Basic attribute level faults, mostly basic numeric manipulations. A good place to get started.
"""
from noisify.faults.utilities import dropped_scramble
from .fault import AttributeFault
import random
[docs]class GaussianNoise(AttributeFault):
"""
Applies a gaussian noise to a numeric object.
>>> noise = GaussianNoise(sigma=0.5)
>>> noise.impact(27)
28.08656007204934
Numpy arrays like objects apply noise separately to each element.
>>> import numpy as np
>>> test = np.arange(5)
>>> noise.impact(test)
array([0.56983913, 0.92835482, 2.36240306, 2.87398093, 3.92371237])
"""
def __init__(self, sigma=0):
"""
Instantiate with sigma, mu is set to the value of the passed in object.
:param sigma:
"""
AttributeFault.__init__(self, sigma=sigma)
self.sigma = sigma
pass
[docs] @register_implementation(priority=15)
def pil_image(self, image_object):
"""Support for PIL image objects, undetectable unless high sigma given"""
from PIL import Image
import numpy as np
input_size = image_size(image_object)
noise_mask = np.random.normal(scale=self.sigma, size=input_size)
image_array = np.array(image_object)
output = Image.fromarray(np.uint8(np.clip(image_array + noise_mask, 0, 255)))
return output
[docs] @register_implementation(priority=12)
def pandas_df(self, data_frame):
"""Support for pandas dataframes"""
import numpy as np
noise_mask = np.random.normal(scale=self.sigma, size=data_frame.shape)
return data_frame.add(noise_mask)
[docs] @register_implementation(priority=10)
def numpy_array(self, array_like_object):
"""Support for numpy arrays"""
import numpy as np
noise_mask = np.random.normal(scale=self.sigma, size=array_like_object.size)
return array_like_object + noise_mask
[docs] @register_implementation(priority=1)
def python_numeric(self, python_numeric_object):
"""Support for basic Python numeric types"""
return random.gauss(python_numeric_object, self.sigma)
[docs]class UnitFault(AttributeFault):
"""
Applies a user defined adjustment to the input numeric object. Useful for modelling unit errors.
>>> def celsius_to_kelvin(celsius_value):
... return celsius_value + 273.15
...
>>> kelvin_fault = UnitFault(unit_modifier=celsius_to_kelvin)
>>> kelvin_fault.impact(21)
294.15
"""
def __init__(self, likelihood=1.0, unit_modifier=None):
"""
Instantiate with a function or lambda to apply the necessary unit conversion to a numeric
:param unit_modifier:
"""
if not unit_modifier:
raise NotImplementedError('You need to provide a function to convert the units')
AttributeFault.__init__(self, likelihood=likelihood, unit_modifier=unit_modifier)
self.unit_modifier = unit_modifier
pass
[docs] @register_implementation(priority=15)
def pil_image(self, image_object):
"""Support for PIL images"""
from PIL import Image
import numpy as np
input_size = image_size(image_object)
image_array = np.array(image_object)
output = Image.fromarray(np.uint8(np.clip(self.unit_modifier(image_array), 0, 255)))
return output
[docs] @register_implementation(priority=1)
def numeric(self, numeric_object):
"""Support for basic numeric types, including dataframes and numpy arrays"""
return self.unit_modifier(numeric_object)
[docs]class CalibrationFault(UnitFault):
"""
Subclass of UnitFault, adds a constant offset to the input numeric.
>>> calibration_fault = CalibrationFault(10)
>>> calibration_fault.impact(200)
210
"""
def __init__(self, offset=0):
"""
:param offset: Numeric
"""
def offsetter(value):
return value + offset
UnitFault.__init__(self, unit_modifier=offsetter)
pass
[docs]class InterruptionFault(AttributeFault):
"""
Replaces input with None, activates according to set likelihood.
>>> interrupt = InterruptionFault(1.0)
>>> interrupt.impact('This can be anything')
>>>
"""
def __init__(self, likelihood=0):
"""
:param likelihood: Probability as 0-1 float
"""
AttributeFault.__init__(self, likelihood=likelihood)
pass
[docs] @register_implementation(priority=15)
def pil_image(self, image_object):
"""Support for PIL images"""
from PIL import Image
import numpy as np
input_size = image_size(image_object)
image_array = np.array(image_object)
output = Image.fromarray(np.uint8(self.numpy_array(image_array)))
return output
[docs] @register_implementation(priority=12)
def numpy_array(self, array_like_object):
"""Support numpy arrays and pandas dataframes"""
import numpy as np
noise_mask = np.random.uniform(size=array_like_object.shape)
output_array = array_like_object.copy()
output_array[noise_mask < self.likelihood] = 0
return output_array
[docs] @register_implementation(priority=-1)
def impact_truth(self, truth):
"""Basic behaviour, just returns None!"""
return None
[docs]class TypographicalFault(AttributeFault):
"""
Applies a rough misspelling to the input using faults.utilities.typo()
>>> from noisify.faults import TypographicalFault
>>> typo_fault = TypographicalFault(1.0, 1)
>>> typo_fault.impact('This is the original text')
'Thhiisith heiginal etxt'
"""
def __init__(self, likelihood=0, severity=0):
"""
Instantiate with a likelihood of making a typo, and a severity metric, severities significantly larger than 1
can lead to unstable behaviours
:param likelihood: Probability as 0-1 float
:param severity:
"""
AttributeFault.__init__(self, likelihood=likelihood, severity=severity)
self.severity = severity
[docs] @register_implementation(priority=1)
def impact_string(self, string_object: str):
"""Scrambles strings"""
return typo(string_object, self.severity)
[docs] @register_implementation(priority=1)
def impact_int(self, int_object: int):
"""Scrambles ints"""
return int(self.impact_string(str(int_object)) or 0)
[docs] @register_implementation(priority=1)
def impact_float(self, float_object: float):
"""Scrambles floats, ensures still valid before returning"""
scrambled_float = self.impact_string(str(float_object))
point_found = False
clean_float = []
for char in scrambled_float:
if char == '.':
if point_found:
continue
point_found = True
clean_float.append(char)
return float(''.join(clean_float) or 0)
[docs]def typo(string, severity):
"""
Roughly rearranges string with the occasional missed character, based on applying a gaussian noise filter
to the string character indexes and then rounding to the closest index.
:param string:
:param severity:
:return: mistyped string
"""
return ''.join(dropped_scramble(string, float(severity), 3))
[docs]def get_mode_size(mode):
"""Converts a PIL image mode string into a dimension cardinality"""
return len([i for i in mode if i.isupper()])
def image_size(image_object):
channels = get_mode_size(image_object.mode)
if channels > 1:
return image_object.height, image_object.width, channels
else:
return image_object.height, image_object.width