Up

sparktk.frame.ops.matrix_pca module

# vim: set encoding=utf-8

#  Copyright (c) 2016 Intel Corporation 
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

def matrix_pca(self, matrix_column_name, v_matrix_column_name):

    """
    Compute the Principal Component Analysis of a matrix

    Parameters
    ----------

    :param matrix_column_name: Name of the column storing the matrices whose principal components are to be computed
    :param v_matrix_column_name: Name of the column storing the V matrix
    :return: (Frame) returns the frame with new column storing the principal components for the corresponding matrix

    Calculate the Principal Components for each matrix in column 'matrix_column_name' using the V matrix

    Examples
    --------
        >>> from sparktk import dtypes
        >>> data = [[1, [[1,2,3,5],[2,3,5,6],[4,6,7,3],[8,9,2,4]]]]
        >>> schema = [('id', int),('pixeldata', dtypes.matrix)]
        >>> my_frame = tc.frame.create(data, schema)

        >>> my_frame.inspect()
        [#]  id  pixeldata
        ============================
        [0]   1  [[ 1.  2.  3.  5.]
        [ 2.  3.  5.  6.]
        [ 4.  6.  7.  3.]
        [ 8.  9.  2.  4.]]

        Compute the singular value decomposition for the matrices in 'pixeldata' column of the frame
        >>> my_frame.matrix_svd('pixeldata')

        Three new columns get added storing the U matrix, V matrix and Singular Vectors
        >>> my_frame.inspect()
        [#]  id  pixeldata
        ============================
        [0]   1  [[ 1.  2.  3.  5.]
        [ 2.  3.  5.  6.]
        [ 4.  6.  7.  3.]
        [ 8.  9.  2.  4.]]
        
        [#]  U_pixeldata
        ========================================================
        [0]  [[-0.29128979 -0.43716238 -0.44530839  0.72507913]
        [-0.42474933 -0.55066945 -0.26749936 -0.66692972]
        [-0.55099141 -0.16785045  0.79986267  0.16868433]
        [-0.65661765  0.69099814 -0.30060644 -0.0317899 ]]
        
        [#]  V_pixeldata
        ========================================================
        [0]  [[-0.47195872  0.50289367 -0.05244699 -0.72222035]
        [-0.60780067  0.40702574  0.11313693  0.67239008]
        [-0.44835972 -0.58469285  0.65644993 -0.16180641]
        [-0.45476024 -0.48945099 -0.74399115  0.01039344]]
        
        [#]  SingularVectors_pixeldata
        ============================================================
        [0]  [[ 18.21704938   6.59797925   3.54086993   0.26080987]]

        Compute the principal components using the V matrices computed for matrices in 'pixeldata'
        >>> my_frame.matrix_pca('pixeldata', 'V_pixeldata')

        A new column gets added storing the Principal components matrix
        >>> my_frame.inspect()
        [#]  id  pixeldata
        ============================
        [0]   1  [[ 1.  2.  3.  5.]
        [ 2.  3.  5.  6.]
        [ 4.  6.  7.  3.]
        [ 8.  9.  2.  4.]]
        
        [#]  U_pixeldata
        ========================================================
        [0]  [[-0.29128979 -0.43716238 -0.44530839  0.72507913]
        [-0.42474933 -0.55066945 -0.26749936 -0.66692972]
        [-0.55099141 -0.16785045  0.79986267  0.16868433]
        [-0.65661765  0.69099814 -0.30060644 -0.0317899 ]]
        
        [#]  V_pixeldata
        ========================================================
        [0]  [[-0.47195872  0.50289367 -0.05244699 -0.72222035]
        [-0.60780067  0.40702574  0.11313693  0.67239008]
        [-0.44835972 -0.58469285  0.65644993 -0.16180641]
        [-0.45476024 -0.48945099 -0.74399115  0.01039344]]
        
        [#]  SingularVectors_pixeldata
        ============================================================
        [0]  [[ 18.21704938   6.59797925   3.54086993   0.26080987]]
        
        [#]  PrincipalComponents_pixeldata
        ========================================================
        [0]  [[-0.47195872  1.00578734 -0.15734098 -3.61110176]
        [-1.21560134  1.22107722  0.56568466  4.0343405 ]
        [-1.79343888 -3.50815713  4.59514953 -0.48541923]
        [-3.63808191 -4.40505888 -1.4879823   0.04157377]]

    """

    self._scala.matrixPca(matrix_column_name, v_matrix_column_name)

Functions

def matrix_pca(

self, matrix_column_name, v_matrix_column_name)

Compute the Principal Component Analysis of a matrix

Parameters:
matrix_column_name: Name of the column storing the matrices whose principal components are to be computed
v_matrix_column_name: Name of the column storing the V matrix

Returns(Frame): returns the frame with new column storing the principal components for the corresponding matrix

Calculate the Principal Components for each matrix in column 'matrix_column_name' using the V matrix

Examples:
>>> from sparktk import dtypes
>>> data = [[1, [[1,2,3,5],[2,3,5,6],[4,6,7,3],[8,9,2,4]]]]
>>> schema = [('id', int),('pixeldata', dtypes.matrix)]
>>> my_frame = tc.frame.create(data, schema)

>>> my_frame.inspect()
[#]  id  pixeldata
============================
[0]   1  [[ 1.  2.  3.  5.]
[ 2.  3.  5.  6.]
[ 4.  6.  7.  3.]
[ 8.  9.  2.  4.]]

Compute the singular value decomposition for the matrices in 'pixeldata' column of the frame
>>> my_frame.matrix_svd('pixeldata')

Three new columns get added storing the U matrix, V matrix and Singular Vectors
>>> my_frame.inspect()
[#]  id  pixeldata
============================
[0]   1  [[ 1.  2.  3.  5.]
[ 2.  3.  5.  6.]
[ 4.  6.  7.  3.]
[ 8.  9.  2.  4.]]
<BLANKLINE>
[#]  U_pixeldata
========================================================
[0]  [[-0.29128979 -0.43716238 -0.44530839  0.72507913]
[-0.42474933 -0.55066945 -0.26749936 -0.66692972]
[-0.55099141 -0.16785045  0.79986267  0.16868433]
[-0.65661765  0.69099814 -0.30060644 -0.0317899 ]]
<BLANKLINE>
[#]  V_pixeldata
========================================================
[0]  [[-0.47195872  0.50289367 -0.05244699 -0.72222035]
[-0.60780067  0.40702574  0.11313693  0.67239008]
[-0.44835972 -0.58469285  0.65644993 -0.16180641]
[-0.45476024 -0.48945099 -0.74399115  0.01039344]]
<BLANKLINE>
[#]  SingularVectors_pixeldata
============================================================
[0]  [[ 18.21704938   6.59797925   3.54086993   0.26080987]]

Compute the principal components using the V matrices computed for matrices in 'pixeldata'
>>> my_frame.matrix_pca('pixeldata', 'V_pixeldata')

A new column gets added storing the Principal components matrix
>>> my_frame.inspect()
[#]  id  pixeldata
============================
[0]   1  [[ 1.  2.  3.  5.]
[ 2.  3.  5.  6.]
[ 4.  6.  7.  3.]
[ 8.  9.  2.  4.]]
<BLANKLINE>
[#]  U_pixeldata
========================================================
[0]  [[-0.29128979 -0.43716238 -0.44530839  0.72507913]
[-0.42474933 -0.55066945 -0.26749936 -0.66692972]
[-0.55099141 -0.16785045  0.79986267  0.16868433]
[-0.65661765  0.69099814 -0.30060644 -0.0317899 ]]
<BLANKLINE>
[#]  V_pixeldata
========================================================
[0]  [[-0.47195872  0.50289367 -0.05244699 -0.72222035]
[-0.60780067  0.40702574  0.11313693  0.67239008]
[-0.44835972 -0.58469285  0.65644993 -0.16180641]
[-0.45476024 -0.48945099 -0.74399115  0.01039344]]
<BLANKLINE>
[#]  SingularVectors_pixeldata
============================================================
[0]  [[ 18.21704938   6.59797925   3.54086993   0.26080987]]
<BLANKLINE>
[#]  PrincipalComponents_pixeldata
========================================================
[0]  [[-0.47195872  1.00578734 -0.15734098 -3.61110176]
[-1.21560134  1.22107722  0.56568466  4.0343405 ]
[-1.79343888 -3.50815713  4.59514953 -0.48541923]
[-3.63808191 -4.40505888 -1.4879823   0.04157377]]
def matrix_pca(self, matrix_column_name, v_matrix_column_name):

    """
    Compute the Principal Component Analysis of a matrix

    Parameters
    ----------

    :param matrix_column_name: Name of the column storing the matrices whose principal components are to be computed
    :param v_matrix_column_name: Name of the column storing the V matrix
    :return: (Frame) returns the frame with new column storing the principal components for the corresponding matrix

    Calculate the Principal Components for each matrix in column 'matrix_column_name' using the V matrix

    Examples
    --------
        >>> from sparktk import dtypes
        >>> data = [[1, [[1,2,3,5],[2,3,5,6],[4,6,7,3],[8,9,2,4]]]]
        >>> schema = [('id', int),('pixeldata', dtypes.matrix)]
        >>> my_frame = tc.frame.create(data, schema)

        >>> my_frame.inspect()
        [#]  id  pixeldata
        ============================
        [0]   1  [[ 1.  2.  3.  5.]
        [ 2.  3.  5.  6.]
        [ 4.  6.  7.  3.]
        [ 8.  9.  2.  4.]]

        Compute the singular value decomposition for the matrices in 'pixeldata' column of the frame
        >>> my_frame.matrix_svd('pixeldata')

        Three new columns get added storing the U matrix, V matrix and Singular Vectors
        >>> my_frame.inspect()
        [#]  id  pixeldata
        ============================
        [0]   1  [[ 1.  2.  3.  5.]
        [ 2.  3.  5.  6.]
        [ 4.  6.  7.  3.]
        [ 8.  9.  2.  4.]]
        
        [#]  U_pixeldata
        ========================================================
        [0]  [[-0.29128979 -0.43716238 -0.44530839  0.72507913]
        [-0.42474933 -0.55066945 -0.26749936 -0.66692972]
        [-0.55099141 -0.16785045  0.79986267  0.16868433]
        [-0.65661765  0.69099814 -0.30060644 -0.0317899 ]]
        
        [#]  V_pixeldata
        ========================================================
        [0]  [[-0.47195872  0.50289367 -0.05244699 -0.72222035]
        [-0.60780067  0.40702574  0.11313693  0.67239008]
        [-0.44835972 -0.58469285  0.65644993 -0.16180641]
        [-0.45476024 -0.48945099 -0.74399115  0.01039344]]
        
        [#]  SingularVectors_pixeldata
        ============================================================
        [0]  [[ 18.21704938   6.59797925   3.54086993   0.26080987]]

        Compute the principal components using the V matrices computed for matrices in 'pixeldata'
        >>> my_frame.matrix_pca('pixeldata', 'V_pixeldata')

        A new column gets added storing the Principal components matrix
        >>> my_frame.inspect()
        [#]  id  pixeldata
        ============================
        [0]   1  [[ 1.  2.  3.  5.]
        [ 2.  3.  5.  6.]
        [ 4.  6.  7.  3.]
        [ 8.  9.  2.  4.]]
        
        [#]  U_pixeldata
        ========================================================
        [0]  [[-0.29128979 -0.43716238 -0.44530839  0.72507913]
        [-0.42474933 -0.55066945 -0.26749936 -0.66692972]
        [-0.55099141 -0.16785045  0.79986267  0.16868433]
        [-0.65661765  0.69099814 -0.30060644 -0.0317899 ]]
        
        [#]  V_pixeldata
        ========================================================
        [0]  [[-0.47195872  0.50289367 -0.05244699 -0.72222035]
        [-0.60780067  0.40702574  0.11313693  0.67239008]
        [-0.44835972 -0.58469285  0.65644993 -0.16180641]
        [-0.45476024 -0.48945099 -0.74399115  0.01039344]]
        
        [#]  SingularVectors_pixeldata
        ============================================================
        [0]  [[ 18.21704938   6.59797925   3.54086993   0.26080987]]
        
        [#]  PrincipalComponents_pixeldata
        ========================================================
        [0]  [[-0.47195872  1.00578734 -0.15734098 -3.61110176]
        [-1.21560134  1.22107722  0.56568466  4.0343405 ]
        [-1.79343888 -3.50815713  4.59514953 -0.48541923]
        [-3.63808191 -4.40505888 -1.4879823   0.04157377]]

    """

    self._scala.matrixPca(matrix_column_name, v_matrix_column_name)