"""
Affine transforms implemented on torch tensors, and
requiring only one interpolation
"""
import math
import random
import torch as th
from ..utils import th_affine2d, th_random_choice
[docs]class RandomAffine:
def __init__(self,
rotation_range=None,
translation_range=None,
shear_range=None,
zoom_range=None,
interp='bilinear',
lazy=False):
"""
Perform an affine transforms with various sub-transforms, using
only one interpolation and without having to instantiate each
sub-transform individually.
Arguments
---------
rotation_range : one integer or float
image will be rotated randomly between (-degrees, degrees)
translation_range : a float or a tuple/list with 2 floats between [0, 1)
first value:
image will be horizontally shifted between
(-height_range * height_dimension, height_range * height_dimension)
second value:
Image will be vertically shifted between
(-width_range * width_dimension, width_range * width_dimension)
shear_range : float
image will be sheared randomly between (-degrees, degrees)
zoom_range : list/tuple with two floats between [0, infinity).
first float should be less than the second
lower and upper bounds on percent zoom.
Anything less than 1.0 will zoom in on the image,
anything greater than 1.0 will zoom out on the image.
e.g. (0.7, 1.0) will only zoom in,
(1.0, 1.4) will only zoom out,
(0.7, 1.4) will randomly zoom in or out
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
"""
self.transforms = []
if rotation_range is not None:
rotation_tform = RandomRotate(rotation_range, lazy=True)
self.transforms.append(rotation_tform)
if translation_range is not None:
translation_tform = RandomTranslate(translation_range, lazy=True)
self.transforms.append(translation_tform)
if shear_range is not None:
shear_tform = RandomShear(shear_range, lazy=True)
self.transforms.append(shear_tform)
if zoom_range is not None:
zoom_tform = RandomZoom(zoom_range, lazy=True)
self.transforms.append(zoom_tform)
self.interp = interp
self.lazy = lazy
if len(self.transforms) == 0:
raise Exception('Must give at least one transform parameter')
def __call__(self, *inputs):
# collect all of the lazily returned tform matrices
tform_matrix = self.transforms[0](inputs[0])
for tform in self.transforms[1:]:
tform_matrix = tform_matrix.mm(tform(inputs[0]))
self.tform_matrix = tform_matrix
if self.lazy:
return tform_matrix
outputs = Affine(tform_matrix, interp=self.interp)(*inputs)
return outputs
[docs]class Affine:
def __init__(self,
tform_matrix,
interp='bilinear'):
"""
Perform an affine transforms with various sub-transforms, using
only one interpolation and without having to instantiate each
sub-transform individually.
Arguments
---------
tform_matrix : a 2x3 or 3x3 matrix
affine transformation matrix to apply
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
"""
self.tform_matrix = tform_matrix
self.interp = interp
def __call__(self, *inputs):
if not isinstance(self.interp, (tuple,list)):
interp = [self.interp]*len(inputs)
else:
interp = self.interp
outputs = []
idx = None
for idx, _input in enumerate(inputs):
input_tf = th_affine2d(_input, self.tform_matrix, mode=interp[idx])
outputs.append(input_tf)
return outputs if idx >= 1 else outputs[0]
[docs]class AffineCompose:
def __init__(self,
transforms,
interp='bilinear'):
"""
Apply a collection of explicit affine transforms to an input image,
and to a target image if necessary
Arguments
---------
transforms : list or tuple
each element in the list/tuple should be an affine transform.
currently supported transforms:
- Rotate()
- Translate()
- Shear()
- Zoom()
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
"""
self.transforms = transforms
self.interp = interp
# set transforms to lazy so they only return the tform matrix
for t in self.transforms:
t.lazy = True
def __call__(self, *inputs):
# collect all of the lazily returned tform matrices
tform_matrix = self.transforms[0](inputs[0])
for tform in self.transforms[1:]:
tform_matrix = tform_matrix.mm(tform(inputs[0]))
if not isinstance(self.interp, (tuple,list)):
interp = [self.interp]*len(inputs)
else:
interp = self.interp
outputs = []
idx = None
for idx, _input in enumerate(inputs):
input_tf = th_affine2d(_input,
tform_matrix,
mode=interp[idx])
outputs.append(input_tf)
return outputs if idx >= 1 else outputs[0]
[docs]class RandomRotate:
def __init__(self,
rotation_range,
interp='bilinear',
lazy=False):
"""
Randomly rotate an image between (-degrees, degrees). If the image
has multiple channels, the same rotation will be applied to each channel.
Arguments
---------
rotation_range : integer or float
image will be rotated between (-degrees, degrees) degrees
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
lazy : boolean
if true, only create the affine transform matrix and return that
if false, perform the transform on the tensor and return the tensor
"""
self.rotation_range = rotation_range
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
degree = random.uniform(-self.rotation_range, self.rotation_range)
if self.lazy:
return Rotate(degree, lazy=True)(inputs[0])
outputs = Rotate(degree, interp=self.interp)(*inputs)
return outputs
[docs]class RandomChoiceRotate:
def __init__(self,
values,
p=None,
interp='bilinear',
lazy=False):
"""
Randomly rotate an image from a list of values. If the image
has multiple channels, the same rotation will be applied to each channel.
Arguments
---------
values : a list or tuple
the values from which the rotation value will be sampled
p : a list or tuple the same length as `values`
the probabilities of sampling any given value. Must sum to 1.
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
lazy : boolean
if true, only create the affine transform matrix and return that
if false, perform the transform on the tensor and return the tensor
"""
if isinstance(values, (list, tuple)):
values = th.FloatTensor(values)
self.values = values
if p is None:
p = th.ones(len(values)) / len(values)
else:
if abs(1.0-sum(p)) > 1e-3:
raise ValueError('Probs must sum to 1')
self.p = p
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
degree = th_random_choice(self.values, p=self.p)
if self.lazy:
return Rotate(degree, lazy=True)(inputs[0])
outputs = Rotate(degree, interp=self.interp)(*inputs)
return outputs
[docs]class Rotate:
def __init__(self,
value,
interp='bilinear',
lazy=False):
"""
Randomly rotate an image between (-degrees, degrees). If the image
has multiple channels, the same rotation will be applied to each channel.
Arguments
---------
rotation_range : integer or float
image will be rotated between (-degrees, degrees) degrees
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
lazy : boolean
if true, only create the affine transform matrix and return that
if false, perform the transform on the tensor and return the tensor
"""
self.value = value
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
if not isinstance(self.interp, (tuple,list)):
interp = [self.interp]*len(inputs)
else:
interp = self.interp
theta = math.pi / 180 * self.value
rotation_matrix = th.FloatTensor([[math.cos(theta), -math.sin(theta), 0],
[math.sin(theta), math.cos(theta), 0],
[0, 0, 1]])
if self.lazy:
return rotation_matrix
outputs = []
idx = None
for idx, _input in enumerate(inputs):
input_tf = th_affine2d(_input, rotation_matrix, mode=interp[idx], center=True)
outputs.append(input_tf)
return outputs if idx >= 1 else outputs[0]
[docs]class RandomTranslate:
def __init__(self,
translation_range,
interp='bilinear',
lazy=False):
"""
Randomly translate an image some fraction of total height and/or
some fraction of total width. If the image has multiple channels,
the same translation will be applied to each channel.
Arguments
---------
translation_range : two floats between [0, 1)
first value:
fractional bounds of total height to shift image
image will be horizontally shifted between
(-height_range * height_dimension, height_range * height_dimension)
second value:
fractional bounds of total width to shift image
Image will be vertically shifted between
(-width_range * width_dimension, width_range * width_dimension)
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
lazy : boolean
if true, only create the affine transform matrix and return that
if false, perform the transform on the tensor and return the tensor
"""
if isinstance(translation_range, float):
translation_range = (translation_range, translation_range)
self.height_range = translation_range[0]
self.width_range = translation_range[1]
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
# height shift
random_height = random.uniform(-self.height_range, self.height_range)
# width shift
random_width = random.uniform(-self.width_range, self.width_range)
if self.lazy:
return Translate([random_height, random_width], lazy=True)(inputs[0])
outputs = Translate([random_height, random_width], interp=self.interp)(*inputs)
return outputs
[docs]class RandomChoiceTranslate:
def __init__(self,
values,
p=None,
interp='bilinear',
lazy=False):
"""
Randomly translate an image some fraction of total height and/or
some fraction of total width from a list of potential values.
If the image has multiple channels,
the same translation will be applied to each channel.
Arguments
---------
values : a list or tuple
the values from which the translation value will be sampled
p : a list or tuple the same length as `values`
the probabilities of sampling any given value. Must sum to 1.
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
lazy : boolean
if true, only create the affine transform matrix and return that
if false, perform the transform on the tensor and return the tensor
"""
if isinstance(values, (list, tuple)):
values = th.FloatTensor(values)
self.values = values
if p is None:
p = th.ones(len(values)) / len(values)
else:
if abs(1.0-sum(p)) > 1e-3:
raise ValueError('Probs must sum to 1')
self.p = p
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
random_height = th_random_choice(self.values, p=self.p)
random_width = th_random_choice(self.values, p=self.p)
if self.lazy:
return Translate([random_height, random_width],
lazy=True)(inputs[0])
outputs = Translate([random_height, random_width], interp=self.interp)(*inputs)
return outputs
[docs]class Translate:
def __init__(self,
value,
interp='bilinear',
lazy=False):
"""
Arguments
---------
value : float or 2-tuple of float
if single value, both horizontal and vertical translation
will be this value * total height/width. Thus, value should
be a fraction of total height/width with range (-1, 1)
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
"""
if not isinstance(value, (tuple,list)):
value = (value, value)
if value[0] > 1 or value[0] < -1:
raise ValueError('Translation must be between -1 and 1')
if value[1] > 1 or value[1] < -1:
raise ValueError('Translation must be between -1 and 1')
self.height_range = value[0]
self.width_range = value[1]
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
if not isinstance(self.interp, (tuple,list)):
interp = [self.interp]*len(inputs)
else:
interp = self.interp
tx = self.height_range * inputs[0].size(1)
ty = self.width_range * inputs[0].size(2)
translation_matrix = th.FloatTensor([[1, 0, tx],
[0, 1, ty],
[0, 0, 1]])
if self.lazy:
return translation_matrix
outputs = []
idx = None
for idx, _input in enumerate(inputs):
input_tf = th_affine2d(_input, translation_matrix, mode=interp[idx], center=True)
outputs.append(input_tf)
return outputs if idx >= 1 else outputs[0]
[docs]class RandomShear:
def __init__(self,
shear_range,
interp='bilinear',
lazy=False):
"""
Randomly shear an image with radians (-shear_range, shear_range)
Arguments
---------
shear_range : float
radian bounds on the shear transform
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
lazy : boolean
if false, perform the transform on the tensor and return the tensor
if true, only create the affine transform matrix and return that
"""
self.shear_range = shear_range
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
shear = random.uniform(-self.shear_range, self.shear_range)
if self.lazy:
return Shear(shear, lazy=True)(inputs[0])
outputs = Shear(shear, interp=self.interp)(*inputs)
return outputs
[docs]class RandomChoiceShear:
def __init__(self,
values,
p=None,
interp='bilinear',
lazy=False):
"""
Randomly shear an image with a value sampled from a list of values.
Arguments
---------
values : a list or tuple
the values from which the rotation value will be sampled
p : a list or tuple the same length as `values`
the probabilities of sampling any given value. Must sum to 1.
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
lazy : boolean
if false, perform the transform on the tensor and return the tensor
if true, only create the affine transform matrix and return that
"""
if isinstance(values, (list, tuple)):
values = th.FloatTensor(values)
self.values = values
if p is None:
p = th.ones(len(values)) / len(values)
else:
if abs(1.0-sum(p)) > 1e-3:
raise ValueError('Probs must sum to 1')
self.p = p
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
shear = th_random_choice(self.values, p=self.p)
if self.lazy:
return Shear(shear,
lazy=True)(inputs[0])
outputs = Shear(shear, interp=self.interp)(*inputs)
return outputs
[docs]class Shear:
def __init__(self,
value,
interp='bilinear',
lazy=False):
self.value = value
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
if not isinstance(self.interp, (tuple,list)):
interp = [self.interp]*len(inputs)
else:
interp = self.interp
theta = (math.pi * self.value) / 180
shear_matrix = th.FloatTensor([[1, -math.sin(theta), 0],
[0, math.cos(theta), 0],
[0, 0, 1]])
if self.lazy:
return shear_matrix
outputs = []
idx = None
for idx, _input in enumerate(inputs):
input_tf = th_affine2d(_input, shear_matrix, mode=interp[idx], center=True)
outputs.append(input_tf)
return outputs if idx >= 1 else outputs[0]
[docs]class RandomSquareZoom:
def __init__(self,
zoom_range,
interp='bilinear',
lazy=False):
"""
Randomly zoom in and/or out on an image
Arguments
---------
zoom_range : tuple or list with 2 values, both between (0, infinity)
lower and upper bounds on percent zoom.
Anything less than 1.0 will zoom in on the image,
anything greater than 1.0 will zoom out on the image.
e.g. (0.7, 1.0) will only zoom in,
(1.0, 1.4) will only zoom out,
(0.7, 1.4) will randomly zoom in or out
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
lazy : boolean
if false, perform the transform on the tensor and return the tensor
if true, only create the affine transform matrix and return that
"""
if not isinstance(zoom_range, list) and not isinstance(zoom_range, tuple):
raise ValueError('zoom_range must be tuple or list with 2 values')
self.zoom_range = zoom_range
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
zx = random.uniform(self.zoom_range[0], self.zoom_range[1])
zy = zx
if self.lazy:
return Zoom([zx, zy], lazy=True)(inputs[0])
return Zoom([zx, zy], interp=self.interp)(*inputs)
[docs]class RandomZoom:
def __init__(self,
zoom_range,
interp='bilinear',
lazy=False):
"""
Randomly zoom in and/or out on an image
Arguments
---------
zoom_range : tuple or list with 2 values, both between (0, infinity)
lower and upper bounds on percent zoom.
Anything less than 1.0 will zoom in on the image,
anything greater than 1.0 will zoom out on the image.
e.g. (0.7, 1.0) will only zoom in,
(1.0, 1.4) will only zoom out,
(0.7, 1.4) will randomly zoom in or out
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
lazy : boolean
if false, perform the transform on the tensor and return the tensor
if true, only create the affine transform matrix and return that
"""
if not isinstance(zoom_range, list) and not isinstance(zoom_range, tuple):
raise ValueError('zoom_range must be tuple or list with 2 values')
self.zoom_range = zoom_range
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
zx = random.uniform(self.zoom_range[0], self.zoom_range[1])
zy = random.uniform(self.zoom_range[0], self.zoom_range[1])
if self.lazy:
return Zoom([zx, zy], lazy=True)(inputs[0])
return Zoom([zx, zy], interp=self.interp)(*inputs)
[docs]class RandomChoiceZoom:
def __init__(self,
values,
p=None,
interp='bilinear',
lazy=False):
"""
Randomly zoom in and/or out on an image with a value sampled from
a list of values
Arguments
---------
values : a list or tuple
the values from which the applied zoom value will be sampled
p : a list or tuple the same length as `values`
the probabilities of sampling any given value. Must sum to 1.
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
lazy : boolean
if false, perform the transform on the tensor and return the tensor
if true, only create the affine transform matrix and return that
"""
if isinstance(values, (list, tuple)):
values = th.FloatTensor(values)
self.values = values
if p is None:
p = th.ones(len(values)) / len(values)
else:
if abs(1.0-sum(p)) > 1e-3:
raise ValueError('Probs must sum to 1')
self.p = p
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
zx = th_random_choice(self.values, p=self.p)
zy = th_random_choice(self.values, p=self.p)
if self.lazy:
return Zoom([zx, zy], lazy=True)(inputs[0])
return Zoom([zx, zy], interp=self.interp)(*inputs)
[docs]class Zoom:
def __init__(self,
value,
interp='bilinear',
lazy=False):
"""
Arguments
---------
value : float
Fractional zoom.
=1 : no zoom
>1 : zoom-in (value-1)%
<1 : zoom-out (1-value)%
interp : string in {'bilinear', 'nearest'} or list of strings
type of interpolation to use. You can provide a different
type of interpolation for each input, e.g. if you have two
inputs then you can say `interp=['bilinear','nearest']
lazy: boolean
If true, just return transformed
"""
if not isinstance(value, (tuple,list)):
value = (value, value)
self.value = value
self.interp = interp
self.lazy = lazy
def __call__(self, *inputs):
if not isinstance(self.interp, (tuple,list)):
interp = [self.interp]*len(inputs)
else:
interp = self.interp
zx, zy = self.value
zoom_matrix = th.FloatTensor([[zx, 0, 0],
[0, zy, 0],
[0, 0, 1]])
if self.lazy:
return zoom_matrix
outputs = []
idx = None
for idx, _input in enumerate(inputs):
input_tf = th_affine2d(_input, zoom_matrix, mode=interp[idx], center=True)
outputs.append(input_tf)
return outputs if idx >= 1 else outputs[0]