# Source code for sporco.prox

# -*- coding: utf-8 -*-
# Copyright (C) 2015-2017 by Brendt Wohlberg <brendt@ieee.org>
# This file is part of the SPORCO package. Details of the copyright
# and user license can be found in the 'LICENSE.txt' file distributed
# with the package.

r"""Norms and their associated proximal maps and projections

The :math:p-norm of a vector is defined as

.. math::
\| \mathbf{x} \|_p = \left( \sum_i | x_i |^p \right)^{1/p}

where :math:x_i is element :math:i of vector :math:\mathbf{x}.
The max norm is a special case

.. math::
\| \mathbf{x} \|_{\infty} = \max_i | x_i | \;\;.

The mixed matrix norm :math:\|X\|_{p,q} is defined here as
:cite:kowalski-2009-sparse

.. math::
\|X\|_{p,q} = \left( \sum_i \left( \sum_j |X_{i,j}|^p \right)^{q/p}
\right)^{1/q} = \left( \sum_i \| \mathbf{x}_i  \|_p^q \right)^{1/q}

where :math:\mathbf{x}_i is row :math:i of matrix
:math:X. Note that some authors use a notation that reverses the
positions of :math:p and :math:q.

The proximal operator of function :math:f is defined as

.. math::
\mathrm{prox}_f(\mathbf{v}) = \mathrm{argmin}_{\mathbf{x}}
\left\{ (1/2) \| \mathbf{x} - \mathbf{v} \|_2^2 + f(\mathbf{x})
\right\} \;\;.

The projection operator of function :math:f is defined as

.. math::
\mathrm{proj}_{f,\gamma}(\mathbf{v}) &= \mathrm{argmin}_{\mathbf{x}}
(1/2) \| \mathbf{x} - \mathbf{v} \|_2^2 \; \text{ s.t. } \;
f(\mathbf{x}) \leq \gamma \\ &= \mathrm{prox}_g(\mathbf{v})

where :math:g(\mathbf{v}) = \iota_C(\mathbf{v}), with
:math:\iota_C denoting the indicator function of set
:math:C = \{ \mathbf{x} \; | \; f(\mathbf{x}) \leq \gamma \}.
"""

from __future__ import division
from builtins import range

import numpy as np
import scipy.optimize as optim
try:
import numexpr as ne
except ImportError:
have_numexpr = False
else:
have_numexpr = True

import sporco.linalg as sl

__author__ = """Brendt Wohlberg <brendt@ieee.org>"""

[docs]def ndto2d(x, axis=-1):
"""Convert a multi-dimensional array into a 2d array, with the axes
specified by the axis parameter flattened into an index along
rows, and the remaining axes flattened into an index along the
columns. This operation can not be properly achieved by a simple
reshape operation since a reshape would shuffle element order if
the axes to be grouped together were not consecutive: this is
avoided by first permuting the axes so that the grouped axes are
consecutive.

Parameters
----------
x : array_like
Multi-dimensional input array
axis : int or tuple of ints, optional (default -1)
Axes of x to be grouped together to form the rows of the output
2d array.

Returns
-------
xtr : ndarray
2D output array
rsi : tuple
A tuple containing the details of transformation applied in the
conversion to 2D
"""

# Convert int axis into a tuple
if isinstance(axis, int):
axis = (axis,)
# Handle negative axis indices
axis = tuple([k if k >= 0 else x.ndim + k for k in axis])
# Complement of axis set on full set of axes of input v
caxis = tuple(set(range(x.ndim)) - set(axis))
# Permute axes of x (generalised transpose) so that axes over
# which operation is to be applied are all at the end
prm = caxis + axis
xt = np.transpose(x, axes=prm)
xts = xt.shape
# Reshape into a 2D array with the axes specified by the axis
# parameter flattened into an index along rows, and the remaining
# axes flattened into an index aalong the columns
xtr = xt.reshape((np.product(xts[0:len(caxis)]), -1))
# Return reshaped array and a tuple containing the information
# necessary to undo the entire operation
return xtr, (xts, prm)

[docs]def ndfrom2d(xtr, rsi):
"""Undo the array shape conversion applied by :func:ndto2d,
returning the input 2D array to its original shape.

Parameters
----------
xtr : array_like
Two-dimensional input array
rsi : tuple
A tuple containing the shape of the axis-permuted array and the
permutation order applied in :func:ndto2d.

Returns
-------
x : ndarray
Multi-dimensional output array
"""

# Extract components of conversion information tuple
xts = rsi[0]
prm = rsi[1]
# Reshape x to the shape obtained after permuting axes in ndto2d
xt = xtr.reshape(xts)
# Undo axis permutation performed in ndto2d
x = np.transpose(xt, np.argsort(prm))
# Return array with shape corresponding to that of the input to ndto2d
return x

[docs]def norm_l0(x, axis=None, eps=0.0):
r"""Compute the :math:\ell_0 "norm" (it is not really a norm)

.. math::
\| \mathbf{x} \|_0 = \sum_i \left\{ \begin{array}{ccc}
0 & \text{if} & x_i = 0 \\ 1 &\text{if}  & x_i \neq 0
\end{array} \right.

where :math:x_i is element :math:i of vector :math:\mathbf{x}.

Parameters
----------
x : array_like
Input array :math:\mathbf{x}
axis : None or int or tuple of ints, optional (default None)
Axes of x over which to compute the :math:\ell_0 "norm". If
None, an entire multi-dimensional array is treated as a
vector. If axes are specified, then distinct values are computed
over the indices of the remaining axes of input array x.
eps : float, optional (default 0.0)
Absolute value threshold below which a number is considered to be zero.

Returns
-------
nl0 : float or ndarray
Norm of x, or array of norms treating specified axes of x
as a vector
"""

nl0 = np.sum(np.abs(x) > eps, axis=axis, keepdims=True)
# If the result has a single element, convert it to a scalar
if nl0.size == 1:
nl0 = nl0.ravel()[0]
return nl0

[docs]def prox_l0(v, alpha):
r"""Proximal operator of the :math:\ell_0 "norm" (hard thresholding)

.. math::
\mathrm{prox}_{\alpha f}(v) = \mathcal{S}_{0,\alpha}(\mathbf{v})
= \left\{ \begin{array}{ccc} 0 & \text{if} &
| v | < \sqrt{2 \alpha} \\ v &\text{if}  &
| v | \geq \sqrt{2 \alpha} \end{array} \right.

Unlike the corresponding :func:norm_l0, there is no need for an
axis parameter since the proximal operator of the :math:\ell_0
norm is the same when taken independently over each element, or
over their sum.

Parameters
----------
v : array_like
Input array :math:\mathbf{v}
alpha : float or array_like
Parameter :math:\alpha

Returns
-------
x : ndarray
Output array
"""

return (np.abs(v) >= np.sqrt(2.0 * alpha)) * v

[docs]def norm_l1(x, axis=None):
r"""Compute the :math:\ell_1 norm

.. math::
\| \mathbf{x} \|_1 = \sum_i | x_i |

where :math:x_i is element :math:i of vector :math:\mathbf{x}.

Parameters
----------
x : array_like
Input array :math:\mathbf{x}
axis : None or int or tuple of ints, optional (default None)
Axes of x over which to compute the :math:\ell_1 norm. If
None, an entire multi-dimensional array is treated as a
vector. If axes are specified, then distinct values are computed
over the indices of the remaining axes of input array x.

Returns
-------
nl1 : float or ndarray
Norm of x, or array of norms treating specified axes of x
as a vector
"""

nl1 = np.sum(np.abs(x), axis=axis, keepdims=True)
# If the result has a single element, convert it to a scalar
if nl1.size == 1:
nl1 = nl1.ravel()[0]
return nl1

[docs]def prox_l1(v, alpha):
r"""Proximal operator of the :math:\ell_1 norm (scalar
shrinkage/soft thresholding)

.. math::
\mathrm{prox}_{\alpha f}(\mathbf{v}) =
\mathcal{S}_{1,\alpha}(\mathbf{v}) = \mathrm{sign}(\mathbf{v}) \odot
\max(0, |\mathbf{v}| - \alpha)

where :math:f(\mathbf{x}) = \|\mathbf{x}\|_1.

Unlike the corresponding :func:norm_l1, there is no need for an
axis parameter since the proximal operator of the :math:\ell_1
norm is the same when taken independently over each element, or
over their sum.

Parameters
----------
v : array_like
Input array :math:\mathbf{v}
alpha : float or array_like
Parameter :math:\alpha

Returns
-------
x : ndarray
Output array
"""

if have_numexpr:
return ne.evaluate(
'where(abs(v)-alpha > 0, where(v >= 0, 1, -1) * (abs(v)-alpha), 0)'
)
else:
return np.sign(v) * (np.clip(np.abs(v) - alpha, 0, float('Inf')))

[docs]def proj_l1(v, gamma, axis=None, method=None):
r"""Projection operator of the :math:\ell_1 norm.

Parameters
----------
v : array_like
Input array :math:\mathbf{v}
gamma : float
Parameter :math:\gamma
axis : None or int or tuple of ints, optional (default None)
Axes of v over which to compute the :math:\ell_1 norm. If
None, an entire multi-dimensional array is treated as a
vector. If axes are specified, then distinct norm values are
computed over the indices of the remaining axes of input array
v.
method : None or str, optional (default None)
Solver method to use. If None, the most appropriate choice is
made based on the axis parameter. Valid methods are

- 'scalarroot'
The solution is computed via the method of Sec. 6.5.2 in
:cite:parikh-2014-proximal.
- 'sortcumsum'
The solution is computed via the method of
:cite:duchi-2008-efficient.

Returns
-------
x : ndarray
Output array
"""

if method is None:
if axis is None:
method = 'scalarroot'
else:
method = 'sortcumsum'

if method == 'scalarroot':
if axis is not None:
raise ValueError('Method scalarroot only supports axis=None')
return _proj_l1_scalar_root(v, gamma)
elif method == 'sortcumsum':
if isinstance(axis, tuple):
vtr, rsi = ndto2d(v, axis)
xtr = _proj_l1_sortsum(vtr, gamma, axis=1)
return ndfrom2d(xtr, rsi)
else:
return _proj_l1_sortsum(v, gamma, axis)
else:
raise ValueError('Unknown solver method %s' % method)

[docs]def _proj_l1_scalar_root(v, gamma):
r"""Projection operator of the :math:\ell_1 norm. The solution is
computed via the method of Sec. 6.5.2 in :cite:parikh-2014-proximal.

There is no axis parameter since the algorithm for computing the
solution treats the input v as a single vector.

Parameters
----------
v : array_like
Input array :math:\mathbf{v}
gamma : float
Parameter :math:\gamma

Returns
-------
x : ndarray
Output array
"""

if norm_l1(v) <= gamma:
return v
else:
av = np.abs(v)
fn = lambda t: np.sum(np.maximum(0, av - t)) - gamma
t = optim.brentq(fn, 0, av.max())
return prox_l1(v, t)

[docs]def _proj_l1_sortsum(v, gamma, axis=None):
r"""Projection operator of the :math:\ell_1 norm. The solution is
computed via the method of :cite:duchi-2008-efficient.

Parameters
----------
v : array_like
Input array :math:\mathbf{v}
gamma : float
Parameter :math:\gamma
axis : None or int, optional (default None)
Axes of v over which to compute the :math:\ell_1 norm. If
None, an entire multi-dimensional array is treated as a
vector. If axes are specified, then distinct norm values are
computed over the indices of the remaining axes of input array
v. **Note:** specifying a tuple of ints is not supported by
this function.

Returns
-------
x : ndarray
Output array
"""

if axis is None and norm_l1(v) <= gamma:
return v
if axis is not None and axis < 0:
axis = v.ndim + axis
av = np.abs(v)
vs = np.sort(av, axis=axis)
if axis is None:
N = v.size
c = 1.0 / np.arange(1, N + 1, dtype=v.dtype).reshape(v.shape)
vs = vs[::-1].reshape(v.shape)
else:
N = v.shape[axis]
ns = [v.shape[k] if k == axis else 1 for k in range(v.ndim)]
c = 1.0 / np.arange(1, N + 1, dtype=v.dtype).reshape(ns)
vs = vs[(slice(None),) * axis + (slice(None, None, -1),)]
t = c * (np.cumsum(vs, axis=axis).reshape(v.shape) - gamma)
K = np.sum(vs >= t, axis=axis, keepdims=True)
t = (np.sum(vs * (vs >= t), axis=axis, keepdims=True) - gamma) / K
t = np.asarray(np.maximum(0, t), dtype=v.dtype)
return np.sign(v) * np.where(av > t, av - t, 0)

[docs]def norm_2l2(x, axis=None):
r"""Compute the squared :math:\ell_2 norm

.. math::
\| \mathbf{x} \|_2^2 = \sum_i x_i^2

where :math:x_i is element :math:i of vector :math:\mathbf{x}.

Parameters
----------
x : array_like
Input array :math:\mathbf{x}
axis : None or int or tuple of ints, optional (default None)
Axes of x over which to compute the :math:\ell_2 norm. If
None, an entire multi-dimensional array is treated as a
vector. If axes are specified, then distinct values are computed
over the indices of the remaining axes of input array x.

Returns
-------
nl2 : float or ndarray
Norm of x, or array of norms treating specified axes of x
as a vector.
"""

nl2 = np.sum(x**2, axis=axis, keepdims=True)
# If the result has a single element, convert it to a scalar
if nl2.size == 1:
nl2 = nl2.ravel()[0]
return nl2

[docs]def norm_l2(x, axis=None):
r"""Compute the :math:\ell_2 norm

.. math::
\| \mathbf{x} \|_2 = \sqrt{ \sum_i x_i^2 }

where :math:x_i is element :math:i of vector :math:\mathbf{x}.

Parameters
----------
x : array_like
Input array :math:\mathbf{x}
axis : None or int or tuple of ints, optional (default None)
Axes of x over which to compute the :math:\ell_2 norm. If
None, an entire multi-dimensional array is treated as a
vector. If axes are specified, then distinct values are computed
over the indices of the remaining axes of input array x.

Returns
-------
nl2 : float or ndarray
Norm of x, or array of norms treating specified axes of x
as a vector.
"""

return np.sqrt(norm_2l2(x, axis))

[docs]def norm_l21(x, axis=-1):
r"""Compute the :math:\ell_{2,1} mixed norm

.. math::
\| X \|_{2,1} = \sum_i \sqrt{ \sum_j X_{i,j}^2 }

where :math:X_{i,j} is element :math:i,j of matrix :math:X.

Parameters
----------
x : array_like
Input array :math:X
axis : None or int or tuple of ints, optional (default -1)
Axes of x over which to compute the :math:\ell_2 norm. If
None, an entire multi-dimensional array is treated as a
vector, in which case the result is just the :math:\ell_2 norm.
If axes are specified, then the sum over the :math:\ell_2 norm
values is computed over the indices of the remaining axes of input
array x.

Returns
-------
nl21 : float
Norm of :math:X
"""

return np.sum(norm_l2(x, axis=axis))

[docs]def prox_l2(v, alpha, axis=None):
r"""Proximal operator of the :math:\ell_2 norm (vector shrinkage/soft
thresholding)

.. math::
\mathrm{prox}_{\alpha f}(\mathbf{v}) = \frac{\mathbf{v}}
{\|\mathbf{v}\|_2} \max(0, \|\mathbf{v}\|_2 - \alpha) =
\mathcal{S}_{2,\alpha}(\mathbf{v})

where :math:f(\mathbf{x}) = \|\mathbf{x}\|_2.

Parameters
----------
v : array_like
Input array :math:\mathbf{v}
alpha : float or array_like
Parameter :math:\alpha
axis : None or int or tuple of ints, optional (default None)
Axes of v over which to compute the :math:\ell_2 norm. If
None, an entire multi-dimensional array is treated as a
vector. If axes are specified, then distinct norm values are
computed over the indices of the remaining axes of input array
v, which is equivalent to the proximal operator of the sum over
these values (i.e. an :math:\ell_{2,1} norm).

Returns
-------
x : ndarray
Output array
"""

a = np.sqrt(np.sum(v**2, axis=axis, keepdims=True))
b = np.maximum(0, a - alpha)
b = sl.zdivide(b, a)
return b * v

[docs]def proj_l2(v, gamma, axis=None):
r"""Projection operator of the :math:\ell_2 norm.

The projection operator of the uncentered :math:\ell_2 norm,

.. math::
\mathrm{argmin}_{\mathbf{x}} (1/2) \| \mathbf{x} - \mathbf{v} \|_2^2 \;
\text{ s.t. } \; \| \mathbf{x} - \mathbf{s} \|_2 \leq \gamma

can be computed as :math:\mathbf{s} + \mathrm{proj}_{f,\gamma}
(\mathbf{v} - \mathbf{s}) where :math:f(\mathbf{x}) =
\| \mathbf{x} \|_2.

Parameters
----------
v : array_like
Input array :math:\mathbf{v}
gamma : float
Parameter :math:\gamma
axis : None or int or tuple of ints, optional (default None)
Axes of v over which to compute the :math:\ell_2 norm. If
None, an entire multi-dimensional array is treated as a vector.
If axes are specified, then distinct norm values are computed
over the indices of the remaining axes of input array v.

Returns
-------
x : ndarray
Output array
"""

d = np.sqrt(np.sum(v**2, axis=axis, keepdims=True))
return (d <= gamma) * v + (d > gamma) * (gamma * sl.zdivide(v, d))

[docs]def prox_l1l2(v, alpha, beta, axis=None):
r"""Proximal operator of the :math:\ell_1 plus :math:\ell_2 norm
(compound shrinkage/soft thresholding) :cite:wohlberg-2012-local
:cite:chartrand-2013-nonconvex

.. math::
\mathrm{prox}_{f}(\mathbf{v}) =
\mathcal{S}_{1,2,\alpha,\beta}(\mathbf{v}) =
\mathcal{S}_{2,\beta}(\mathcal{S}_{1,\alpha}(\mathbf{v}))

where :math:f(\mathbf{x}) = \alpha \|\mathbf{x}\|_1 +
\beta \|\mathbf{x}\|_2.

Parameters
----------
v : array_like
Input array :math:\mathbf{v}
alpha : float or array_like
Parameter :math:\alpha
beta : float or array_like
Parameter :math:\beta
axis : None or int or tuple of ints, optional (default None)
Axes of v over which to compute the :math:\ell_2 norm. If
None, an entire multi-dimensional array is treated as a
vector. If axes are specified, then distinct norm values are
computed over the indices of the remaining axes of input array
v, which is equivalent to the proximal operator of the sum over
these values (i.e. an :math:\ell_{2,1} norm).

Returns
-------
x : ndarray
Output array
"""

return prox_l2(prox_l1(v, alpha), beta, axis)

[docs]def norm_nuclear(x):
r"""Compute the nuclear norm

.. math::
\| X \|_1 = \sum_i \sigma_i

where :math:\sigma_i are the singular values of matrix :math:X.

Parameters
----------
x : array_like
Input array :math:X

Returns
-------
nncl : float
Norm of x
"""

return np.sum(np.linalg.svd(sl.promote16(x), compute_uv=False))

[docs]def prox_nuclear(v, alpha):
r"""Proximal operator of the nuclear norm :cite:cai-2010-singular.

Parameters
----------
v : array_like
Input array :math:V
alpha : float
Parameter :math:\alpha

Returns
-------
x : ndarray
Output array
s : ndarray
Singular values of x
"""

U, s, V = sl.promote16(v, fn=np.linalg.svd, full_matrices=False)
ss = np.maximum(0, s - alpha)
return np.dot(U, np.dot(np.diag(ss), V)), ss