Up

sparktk.frame.ops.covariance_matrix module

# vim: set encoding=utf-8

#  Copyright (c) 2016 Intel Corporation 
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#


def covariance_matrix(self, data_column_names):
    """
    Calculate covariance matrix for two or more columns.

    Parameters
    ----------

    :param data_column_names: (List[str]) The names of the column from which to compute the matrix.
                              Names should refer to a single column of type vector, or two or more
                              columns of numeric scalars.
    :return: (Frame) A matrix with the covariance values for the columns.

    Notes
    -----
    This function applies only to columns containing numerical data.

    Examples
    --------
    Consider Frame *my_frame*, which contains the data

        >>> my_frame.inspect()
         [#]  idnum  x1   x2   x3   x4
        ===============================
        [0]      0  1.0  4.0  0.0  -1.0
        [1]      1  2.0  3.0  0.0  -1.0
        [2]      2  3.0  2.0  1.0  -1.0
        [3]      3  4.0  1.0  2.0  -1.0
        [4]      4  5.0  0.0  2.0  -1.0


    my_frame.covariance_matrix computes the covariance on each pair of columns in the user-provided list.

        >>> cov_matrix = my_frame.covariance_matrix(my_frame.column_names)
        [===Job Progress===]

        The resulting table (specifying all columns) is:

        >>> cov_matrix.inspect()
        [#]  idnum  x1    x2    x3    x4
        =================================
        [0]    2.5   2.5  -2.5   1.5  0.0
        [1]    2.5   2.5  -2.5   1.5  0.0
        [2]   -2.5  -2.5   2.5  -1.5  0.0
        [3]    1.5   1.5  -1.5   1.0  0.0
        [4]    0.0   0.0   0.0   0.0  0.0

    """
    from sparktk.frame.frame import Frame
    return Frame(self._tc,
                 self._scala.covarianceMatrix(self._tc.jutils.convert.to_scala_list_string(data_column_names)))

Functions

def covariance_matrix(

self, data_column_names)

Calculate covariance matrix for two or more columns.

Parameters:
data_column_names(List[str]):The names of the column from which to compute the matrix. Names should refer to a single column of type vector, or two or more columns of numeric scalars.

Returns(Frame): A matrix with the covariance values for the columns.

Notes:

This function applies only to columns containing numerical data.

Examples:

Consider Frame my_frame, which contains the data

>>> my_frame.inspect()
 [#]  idnum  x1   x2   x3   x4
===============================
[0]      0  1.0  4.0  0.0  -1.0
[1]      1  2.0  3.0  0.0  -1.0
[2]      2  3.0  2.0  1.0  -1.0
[3]      3  4.0  1.0  2.0  -1.0
[4]      4  5.0  0.0  2.0  -1.0

my_frame.covariance_matrix computes the covariance on each pair of columns in the user-provided list.

>>> cov_matrix = my_frame.covariance_matrix(my_frame.column_names)
[===Job Progress===]

The resulting table (specifying all columns) is:

>>> cov_matrix.inspect()
[#]  idnum  x1    x2    x3    x4
=================================
[0]    2.5   2.5  -2.5   1.5  0.0
[1]    2.5   2.5  -2.5   1.5  0.0
[2]   -2.5  -2.5   2.5  -1.5  0.0
[3]    1.5   1.5  -1.5   1.0  0.0
[4]    0.0   0.0   0.0   0.0  0.0
def covariance_matrix(self, data_column_names):
    """
    Calculate covariance matrix for two or more columns.

    Parameters
    ----------

    :param data_column_names: (List[str]) The names of the column from which to compute the matrix.
                              Names should refer to a single column of type vector, or two or more
                              columns of numeric scalars.
    :return: (Frame) A matrix with the covariance values for the columns.

    Notes
    -----
    This function applies only to columns containing numerical data.

    Examples
    --------
    Consider Frame *my_frame*, which contains the data

        >>> my_frame.inspect()
         [#]  idnum  x1   x2   x3   x4
        ===============================
        [0]      0  1.0  4.0  0.0  -1.0
        [1]      1  2.0  3.0  0.0  -1.0
        [2]      2  3.0  2.0  1.0  -1.0
        [3]      3  4.0  1.0  2.0  -1.0
        [4]      4  5.0  0.0  2.0  -1.0


    my_frame.covariance_matrix computes the covariance on each pair of columns in the user-provided list.

        >>> cov_matrix = my_frame.covariance_matrix(my_frame.column_names)
        [===Job Progress===]

        The resulting table (specifying all columns) is:

        >>> cov_matrix.inspect()
        [#]  idnum  x1    x2    x3    x4
        =================================
        [0]    2.5   2.5  -2.5   1.5  0.0
        [1]    2.5   2.5  -2.5   1.5  0.0
        [2]   -2.5  -2.5   2.5  -1.5  0.0
        [3]    1.5   1.5  -1.5   1.0  0.0
        [4]    0.0   0.0   0.0   0.0  0.0

    """
    from sparktk.frame.frame import Frame
    return Frame(self._tc,
                 self._scala.covarianceMatrix(self._tc.jutils.convert.to_scala_list_string(data_column_names)))