Up

sparktk.dicom.dicom module

# vim: set encoding=utf-8

#  Copyright (c) 2016 Intel Corporation 
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

import logging
logger = logging.getLogger('sparktk')
from sparktk import TkContext

# import constructors for the API's sake (not actually dependencies of the Dicom class)
from sparktk.dicom.constructors.import_dcm import import_dcm

__all__ = ["Dicom",
           "import_dcm",
           "load"]


class Dicom(object):
    """
    sparktk Dicom

    Represents a collection of DICOM data objects. Reference: [https://en.wikipedia.org/wiki/DICOM](https://en.wikipedia.org/wiki/DICOM)

    The metadata property is a sparktk frame which defines the metadata of the collection of DICOM objects.
    Its schema has a column named "id" which holds a unique integer ID for the record and another column which
    holds a string of XML comprised of the metadata.  Users can run XQuery or invoke canned column extraction/filter
    operations on this frame.

    The pixeldata property is a sparktk frame which defines the pixeldata of the collection of DICOM objects.
    Its schema has a column named "id" which holds a unique integer ID for the record and another column which
    holds a matrix(internally it is a numpy.ndarray) comprised of the pixeldata.  Users can run numpy supported transformations on it.

    dcm4che-3.x dependencies are used to support various operations on dicom images. It is available as java library
    Reference: [https://github.com/dcm4che/dcm4che](https://github.com/dcm4che/dcm4che)

    Note: Currently sparktk Dicom supports only uncompressed dicom images

    Load a set of uncompressed sample .dcm files from path (integration-tests/datasets/dicom_uncompressed)
    and create a dicom object. The below examples helps you to understand how to access dicom object properties.

    Examples
    --------

        #Path can be local/hdfs to dcm file(s)
        >>> dicom_path = "../datasets/dicom_uncompressed"

        #use import_dcm available inside dicom module to create a dicom object from given dicom_path
        >>> dicom = tc.dicom.import_dcm(dicom_path)

        #Type of dicom object created
        >>> type(dicom)
        

        >>> dicom.metadata.count()
        3

        >>> dicom.pixeldata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE=
                1.2.840.10008.5.1.4.1.1.4
                1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
                ...

        #pixeldata property is sparktk frame
        >>> pixeldata = dicom.pixeldata.take(1)

        #Display
        >>> pixeldata
        [[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
        [   0.,  125.,  103., ...,  120.,  213.,  319.],
        [   0.,  117.,   94., ...,  135.,  223.,  325.],
        ...,
        [   0.,   62.,   21., ...,  896.,  886.,  854.],
        [   0.,   63.,   23., ...,  941.,  872.,  897.],
        [   0.,   60.,   30., ...,  951.,  822.,  906.]])]]

        #Access ndarray
        >>> image_ndarray= pixeldata[0][1]

        >>> type(image_ndarray)
        

        #Dimesions of the image matrix stored
        >>> image_ndarray.shape
        (320, 320)

        #Use python matplot lib package to verify image visually
        >>> import pylab
        >>> pylab.imshow(image_ndarray, cmap=pylab.cm.bone)
        >>> pylab.show()

        #Save method persists the dicom object to disk
        >>> dicom.save("sandbox/dicom_data")

        #loads the saved dicom object
        >>> load_dicom = tc.load("sandbox/dicom_data")

        #Re-check whether we loaded back the dicom object or not
        >>> type(load_dicom)
        

        #Again access pixeldata and perform same operations as above
        >>> load_pixeldata = load_dicom.pixeldata.take(1)

        #Order may differ when you load back dicom object

        >>> load_pixeldata
        [[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
        [   0.,  125.,  103., ...,  120.,  213.,  319.],
        [   0.,  117.,   94., ...,  135.,  223.,  325.],
        ...,
        [   0.,   62.,   21., ...,  896.,  886.,  854.],
        [   0.,   63.,   23., ...,  941.,  872.,  897.],
        [   0.,   60.,   30., ...,  951.,  822.,  906.]])]]


        >>> load_image_ndarray= load_pixeldata[0][1]

        >>> type(load_image_ndarray)
        

        >>> load_image_ndarray.shape
        (320, 320)

        #Inspect metadata property to see dicom metadata xml content

        >>> load_dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  >> import xml.etree.ElementTree as ET

        #Performing add_columns operation.
        #Add xml tag as column in dicom metadata frame
        #Here we add SOPInstanceUID as column to metadaframe

        #sample function to apply on row - add_columns
        >>> def extractor(tag_name):
        ...    def _extractor(row):
        ...        root = ET.fromstring(row["metadata"])
        ...        for attribute in root.findall('DicomAttribute'):
        ...            keyword = attribute.get('keyword')
        ...            value = None
        ...            if attribute.find('Value') is not None:
        ...                value = attribute.find('Value').text
        ...            if keyword == tag_name:
        ...                return value
        ...    return _extractor

        >>> tag_name = "SOPInstanceUID"

        >>> dicom.metadata.add_columns(extractor(tag_name), (tag_name, str))

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata                        SOPInstanceUID
        =======================================================================
        [0]   0  

Functions

def import_dcm(

dicom_dir_path, tc=<class 'sparktk.arguments.implicit'>)

Creates a dicom object with metadataFrame and pixeldataFrame from a dcm file(s)

Parameters:
dicom_dir_path(str):Local/HDFS path of the dcm file(s)

Returns(Dicom): returns a dicom object with metadata and pixeldata frames

Examples:
#Path can be local/hdfs to dcm file(s)
>>> dicom_path = "../datasets/dicom_uncompressed"

#use import_dcm available inside dicom module to create a dicom object from given dicom_path
>>> dicom = tc.dicom.import_dcm(dicom_path)

#Type of dicom object created
>>> type(dicom)
<class 'sparktk.dicom.dicom.Dicom'>

#Inspect metadata property to see dicom metadata xml content
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#pixeldata property is sparktk frame
>>> pixeldata = dicom.pixeldata.take(1)


>>> pixeldata
[[0L, array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
[ 0.,  7.,  5., ...,  5.,  7.,  8.],
[ 0.,  7.,  6., ...,  5.,  6.,  7.],
...,
[ 0.,  6.,  7., ...,  5.,  5.,  6.],
[ 0.,  2.,  5., ...,  5.,  5.,  4.],
[ 1.,  1.,  3., ...,  1.,  1.,  0.]])]]
def import_dcm(dicom_dir_path, tc=TkContext.implicit):
    """
    Creates a dicom object with metadataFrame and pixeldataFrame from a dcm file(s)

    Parameters
    ----------

    :param dicom_dir_path: (str) Local/HDFS path of the dcm file(s)
    :return: (Dicom) returns a dicom object with metadata and pixeldata frames


    Examples
    --------
        #Path can be local/hdfs to dcm file(s)
        >>> dicom_path = "../datasets/dicom_uncompressed"

        #use import_dcm available inside dicom module to create a dicom object from given dicom_path
        >>> dicom = tc.dicom.import_dcm(dicom_path)

        #Type of dicom object created
        >>> type(dicom)
        

        #Inspect metadata property to see dicom metadata xml content
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  >> pixeldata = dicom.pixeldata.take(1)


        >>> pixeldata
        [[0L, array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  7.,  5., ...,  5.,  7.,  8.],
        [ 0.,  7.,  6., ...,  5.,  6.,  7.],
        ...,
        [ 0.,  6.,  7., ...,  5.,  5.,  6.],
        [ 0.,  2.,  5., ...,  5.,  5.,  4.],
        [ 1.,  1.,  3., ...,  1.,  1.,  0.]])]]

    """
    if not isinstance(dicom_dir_path, basestring):
        raise ValueError("dicom_dir_path parameter must be a string, but is {0}.".format(type(dicom_dir_path)))

    TkContext.validate(tc)

    scala_dicom = tc.sc._jvm.org.trustedanalytics.sparktk.dicom.internal.constructors.Import.importDcm(tc.jutils.get_scala_sc(), dicom_dir_path)
    from sparktk.dicom.dicom import Dicom
    return Dicom._from_scala(tc, scala_dicom)

def load(

path, tc=<class 'sparktk.arguments.implicit'>)

load Dicom from given path

def load(path, tc=TkContext.implicit):
    """load Dicom from given path"""
    TkContext.validate(tc)
    return tc.load(path, Dicom)

Classes

class Dicom

sparktk Dicom

Represents a collection of DICOM data objects. Reference: https://en.wikipedia.org/wiki/DICOM

The metadata property is a sparktk frame which defines the metadata of the collection of DICOM objects. Its schema has a column named "id" which holds a unique integer ID for the record and another column which holds a string of XML comprised of the metadata. Users can run XQuery or invoke canned column extraction/filter operations on this frame.

The pixeldata property is a sparktk frame which defines the pixeldata of the collection of DICOM objects. Its schema has a column named "id" which holds a unique integer ID for the record and another column which holds a matrix(internally it is a numpy.ndarray) comprised of the pixeldata. Users can run numpy supported transformations on it.

dcm4che-3.x dependencies are used to support various operations on dicom images. It is available as java library Reference: https://github.com/dcm4che/dcm4che

Note: Currently sparktk Dicom supports only uncompressed dicom images

Load a set of uncompressed sample .dcm files from path (integration-tests/datasets/dicom_uncompressed) and create a dicom object. The below examples helps you to understand how to access dicom object properties.

Examples:
#Path can be local/hdfs to dcm file(s)
>>> dicom_path = "../datasets/dicom_uncompressed"

#use import_dcm available inside dicom module to create a dicom object from given dicom_path
>>> dicom = tc.dicom.import_dcm(dicom_path)

#Type of dicom object created
>>> type(dicom)
<class 'sparktk.dicom.dicom.Dicom'>

>>> dicom.metadata.count()
3

>>> dicom.pixeldata.count()
3

>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...

#pixeldata property is sparktk frame
>>> pixeldata = dicom.pixeldata.take(1)

#Display
>>> pixeldata
[[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
[   0.,  125.,  103., ...,  120.,  213.,  319.],
[   0.,  117.,   94., ...,  135.,  223.,  325.],
...,
[   0.,   62.,   21., ...,  896.,  886.,  854.],
[   0.,   63.,   23., ...,  941.,  872.,  897.],
[   0.,   60.,   30., ...,  951.,  822.,  906.]])]]

#Access ndarray
>>> image_ndarray= pixeldata[0][1]

>>> type(image_ndarray)
<type 'numpy.ndarray'>

#Dimesions of the image matrix stored
>>> image_ndarray.shape
(320, 320)

#Use python matplot lib package to verify image visually
>>> import pylab
>>> pylab.imshow(image_ndarray, cmap=pylab.cm.bone)
>>> pylab.show()

#Save method persists the dicom object to disk
>>> dicom.save("sandbox/dicom_data")

#loads the saved dicom object
>>> load_dicom = tc.load("sandbox/dicom_data")

#Re-check whether we loaded back the dicom object or not
>>> type(load_dicom)
<class 'sparktk.dicom.dicom.Dicom'>

#Again access pixeldata and perform same operations as above
>>> load_pixeldata = load_dicom.pixeldata.take(1)

#Order may differ when you load back dicom object

>>> load_pixeldata
[[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
[   0.,  125.,  103., ...,  120.,  213.,  319.],
[   0.,  117.,   94., ...,  135.,  223.,  325.],
...,
[   0.,   62.,   21., ...,  896.,  886.,  854.],
[   0.,   63.,   23., ...,  941.,  872.,  897.],
[   0.,   60.,   30., ...,  951.,  822.,  906.]])]]


>>> load_image_ndarray= load_pixeldata[0][1]

>>> type(load_image_ndarray)
<type 'numpy.ndarray'>

>>> load_image_ndarray.shape
(320, 320)

#Inspect metadata property to see dicom metadata xml content

>>> load_dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#Using to built-in xml libraries to run xquery on metadata
>>> import xml.etree.ElementTree as ET

#Performing add_columns operation.
#Add xml tag as column in dicom metadata frame
#Here we add SOPInstanceUID as column to metadaframe

#sample function to apply on row - add_columns
>>> def extractor(tag_name):
...    def _extractor(row):
...        root = ET.fromstring(row["metadata"])
...        for attribute in root.findall('DicomAttribute'):
...            keyword = attribute.get('keyword')
...            value = None
...            if attribute.find('Value') is not None:
...                value = attribute.find('Value').text
...            if keyword == tag_name:
...                return value
...    return _extractor

>>> tag_name = "SOPInstanceUID"

>>> dicom.metadata.add_columns(extractor(tag_name), (tag_name, str))

>>> dicom.metadata.count()
3

>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata                        SOPInstanceUID
=======================================================================
[0]   0  <?xml version="1.0" encodin...  1.3.6.1.4.1.14519.5.2.1.730...
[1]   1  <?xml version="1.0" encodin...  1.3.6.1.4.1.14519.5.2.1.730...
[2]   2  <?xml version="1.0" encodin...  1.3.6.1.4.1.14519.5.2.1.730...
class Dicom(object):
    """
    sparktk Dicom

    Represents a collection of DICOM data objects. Reference: [https://en.wikipedia.org/wiki/DICOM](https://en.wikipedia.org/wiki/DICOM)

    The metadata property is a sparktk frame which defines the metadata of the collection of DICOM objects.
    Its schema has a column named "id" which holds a unique integer ID for the record and another column which
    holds a string of XML comprised of the metadata.  Users can run XQuery or invoke canned column extraction/filter
    operations on this frame.

    The pixeldata property is a sparktk frame which defines the pixeldata of the collection of DICOM objects.
    Its schema has a column named "id" which holds a unique integer ID for the record and another column which
    holds a matrix(internally it is a numpy.ndarray) comprised of the pixeldata.  Users can run numpy supported transformations on it.

    dcm4che-3.x dependencies are used to support various operations on dicom images. It is available as java library
    Reference: [https://github.com/dcm4che/dcm4che](https://github.com/dcm4che/dcm4che)

    Note: Currently sparktk Dicom supports only uncompressed dicom images

    Load a set of uncompressed sample .dcm files from path (integration-tests/datasets/dicom_uncompressed)
    and create a dicom object. The below examples helps you to understand how to access dicom object properties.

    Examples
    --------

        #Path can be local/hdfs to dcm file(s)
        >>> dicom_path = "../datasets/dicom_uncompressed"

        #use import_dcm available inside dicom module to create a dicom object from given dicom_path
        >>> dicom = tc.dicom.import_dcm(dicom_path)

        #Type of dicom object created
        >>> type(dicom)
        

        >>> dicom.metadata.count()
        3

        >>> dicom.pixeldata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE=
                1.2.840.10008.5.1.4.1.1.4
                1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
                ...

        #pixeldata property is sparktk frame
        >>> pixeldata = dicom.pixeldata.take(1)

        #Display
        >>> pixeldata
        [[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
        [   0.,  125.,  103., ...,  120.,  213.,  319.],
        [   0.,  117.,   94., ...,  135.,  223.,  325.],
        ...,
        [   0.,   62.,   21., ...,  896.,  886.,  854.],
        [   0.,   63.,   23., ...,  941.,  872.,  897.],
        [   0.,   60.,   30., ...,  951.,  822.,  906.]])]]

        #Access ndarray
        >>> image_ndarray= pixeldata[0][1]

        >>> type(image_ndarray)
        

        #Dimesions of the image matrix stored
        >>> image_ndarray.shape
        (320, 320)

        #Use python matplot lib package to verify image visually
        >>> import pylab
        >>> pylab.imshow(image_ndarray, cmap=pylab.cm.bone)
        >>> pylab.show()

        #Save method persists the dicom object to disk
        >>> dicom.save("sandbox/dicom_data")

        #loads the saved dicom object
        >>> load_dicom = tc.load("sandbox/dicom_data")

        #Re-check whether we loaded back the dicom object or not
        >>> type(load_dicom)
        

        #Again access pixeldata and perform same operations as above
        >>> load_pixeldata = load_dicom.pixeldata.take(1)

        #Order may differ when you load back dicom object

        >>> load_pixeldata
        [[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
        [   0.,  125.,  103., ...,  120.,  213.,  319.],
        [   0.,  117.,   94., ...,  135.,  223.,  325.],
        ...,
        [   0.,   62.,   21., ...,  896.,  886.,  854.],
        [   0.,   63.,   23., ...,  941.,  872.,  897.],
        [   0.,   60.,   30., ...,  951.,  822.,  906.]])]]


        >>> load_image_ndarray= load_pixeldata[0][1]

        >>> type(load_image_ndarray)
        

        >>> load_image_ndarray.shape
        (320, 320)

        #Inspect metadata property to see dicom metadata xml content

        >>> load_dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  >> import xml.etree.ElementTree as ET

        #Performing add_columns operation.
        #Add xml tag as column in dicom metadata frame
        #Here we add SOPInstanceUID as column to metadaframe

        #sample function to apply on row - add_columns
        >>> def extractor(tag_name):
        ...    def _extractor(row):
        ...        root = ET.fromstring(row["metadata"])
        ...        for attribute in root.findall('DicomAttribute'):
        ...            keyword = attribute.get('keyword')
        ...            value = None
        ...            if attribute.find('Value') is not None:
        ...                value = attribute.find('Value').text
        ...            if keyword == tag_name:
        ...                return value
        ...    return _extractor

        >>> tag_name = "SOPInstanceUID"

        >>> dicom.metadata.add_columns(extractor(tag_name), (tag_name, str))

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata                        SOPInstanceUID
        =======================================================================
        [0]   0  

Ancestors (in MRO)

  • Dicom
  • __builtin__.object

Instance variables

var metadata

var pixeldata

Methods

def __init__(

self, tc, scala_dicom)

def __init__(self, tc, scala_dicom):
    self._tc = tc
    from sparktk.frame.frame import Frame
    self._metadata = Frame(self._tc, scala_dicom.metadata())
    self._pixeldata = Frame(self._tc, scala_dicom.pixeldata())

def drop_rows(

self, predicate)

Drop the rows of dicom metadata and pixeldata frames using given predicate

Parameters:
predicate: predicate to apply on filter
Examples:
>>> dicom_path = "../datasets/dicom_uncompressed"

>>> dicom = tc.dicom.import_dcm(dicom_path)

>>> dicom.metadata.count()
3

>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...

>>> import xml.etree.ElementTree as ET

#sample custom filter function
>>> def drop_meta(tag_name, tag_value):
...    def _drop_meta(row):
...        root = ET.fromstring(row["metadata"])
...        for attribute in root.findall('DicomAttribute'):
...            keyword = attribute.get('keyword')
...            if attribute.get('keyword') is not None:
...                if attribute.find('Value') is not None:
...                    value = attribute.find('Value').text
...                    if keyword == tag_name and value == tag_value:
...                        return True
...    return _drop_meta

>>> tag_name = "SOPInstanceUID"

>>> tag_value = "1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685"

>>> dicom.drop_rows(drop_meta(tag_name, tag_value))

>>> dicom.metadata.count()
2

#After filter
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   1  <?xml version="1.0" encodin...
[1]   2  <?xml version="1.0" encodin...

>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
===========================================================
[0]   1  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.   70.   85. ...,  215.  288.  337.]
[   0.   63.   72. ...,  228.  269.  317.]
...,
[   0.   42.   40. ...,  966.  919.  871.]
[   0.   42.   33. ...,  988.  887.  860.]
[   0.   46.   38. ...,  983.  876.  885.]]
[1]   2  [[    0.     0.     0. ...,     0.     0.     0.]
[    0.   111.   117. ...,   159.   148.   135.]
[    0.   116.   111. ...,   152.   138.   139.]
...,
[    0.    49.    18. ...,  1057.   965.   853.]
[    0.    42.    20. ...,  1046.   973.   891.]
[    0.    48.    26. ...,  1041.   969.   930.]]
def drop_rows(self, predicate):

    """
    Drop the rows of dicom metadata and pixeldata frames using  given predicate

    Parameters
    ----------

    :param predicate: predicate to apply on filter


    Examples
    --------

        >>> dicom_path = "../datasets/dicom_uncompressed"

        >>> dicom = tc.dicom.import_dcm(dicom_path)

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE=
                1.2.840.10008.5.1.4.1.1.4
                1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
                ...

        >>> import xml.etree.ElementTree as ET

        #sample custom filter function
        >>> def drop_meta(tag_name, tag_value):
        ...    def _drop_meta(row):
        ...        root = ET.fromstring(row["metadata"])
        ...        for attribute in root.findall('DicomAttribute'):
        ...            keyword = attribute.get('keyword')
        ...            if attribute.get('keyword') is not None:
        ...                if attribute.find('Value') is not None:
        ...                    value = attribute.find('Value').text
        ...                    if keyword == tag_name and value == tag_value:
        ...                        return True
        ...    return _drop_meta

        >>> tag_name = "SOPInstanceUID"

        >>> tag_value = "1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685"

        >>> dicom.drop_rows(drop_meta(tag_name, tag_value))

        >>> dicom.metadata.count()
        2

        #After filter
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   1  >> dicom.pixeldata.inspect(truncate=30)
        [#]  id  imagematrix
        ===========================================================
        [0]   1  [[   0.    0.    0. ...,    0.    0.    0.]
        [   0.   70.   85. ...,  215.  288.  337.]
        [   0.   63.   72. ...,  228.  269.  317.]
        ...,
        [   0.   42.   40. ...,  966.  919.  871.]
        [   0.   42.   33. ...,  988.  887.  860.]
        [   0.   46.   38. ...,  983.  876.  885.]]
        [1]   2  [[    0.     0.     0. ...,     0.     0.     0.]
        [    0.   111.   117. ...,   159.   148.   135.]
        [    0.   116.   111. ...,   152.   138.   139.]
        ...,
        [    0.    49.    18. ...,  1057.   965.   853.]
        [    0.    42.    20. ...,  1046.   973.   891.]
        [    0.    48.    26. ...,  1041.   969.   930.]]

    """

    def inverted_predicate(row):
        return not predicate(row)

    self.metadata.filter(inverted_predicate)
    filtered_id_frame = self.metadata.copy(columns= "id")
    self._pixeldata = filtered_id_frame.join_inner(self.pixeldata, "id")

def drop_rows_by_keywords(

self, keywords_values_dict)

Drop the rows based on dictionary of {"keyword":"value"}(applying 'and' operation on dictionary) from column holding xml string.

Ex: keywords_values_dict -> {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}

Parameters:
keywords_values_dict(dict(str, str)):dictionary of keywords and values from xml string in metadata
Examples:
>>> dicom_path = "../datasets/dicom_uncompressed"

>>> dicom = tc.dicom.import_dcm(dicom_path)

>>> dicom.metadata.count()
3

>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...

>>> keywords_values_dict = {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
>>> dicom.drop_rows_by_keywords(keywords_values_dict)
>>> dicom.metadata.count()
2

#After drop_rows
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   1  <?xml version="1.0" encodin...
[1]   2  <?xml version="1.0" encodin...

>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
===========================================================
[0]   1  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.   70.   85. ...,  215.  288.  337.]
[   0.   63.   72. ...,  228.  269.  317.]
...,
[   0.   42.   40. ...,  966.  919.  871.]
[   0.   42.   33. ...,  988.  887.  860.]
[   0.   46.   38. ...,  983.  876.  885.]]
[1]   2  [[    0.     0.     0. ...,     0.     0.     0.]
[    0.   111.   117. ...,   159.   148.   135.]
[    0.   116.   111. ...,   152.   138.   139.]
...,
[    0.    49.    18. ...,  1057.   965.   853.]
[    0.    42.    20. ...,  1046.   973.   891.]
[    0.    48.    26. ...,  1041.   969.   930.]]
def drop_rows_by_keywords(self, keywords_values_dict):
    """
    Drop the rows based on dictionary of {"keyword":"value"}(applying 'and' operation on dictionary) from column holding xml string.

    Ex: keywords_values_dict -> {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}

    Parameters
    ----------

    :param keywords_values_dict: (dict(str, str)) dictionary of keywords and values from xml string in metadata


    Examples
    --------

        >>> dicom_path = "../datasets/dicom_uncompressed"

        >>> dicom = tc.dicom.import_dcm(dicom_path)

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE=
                1.2.840.10008.5.1.4.1.1.4
                1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
                ...

        >>> keywords_values_dict = {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
        >>> dicom.drop_rows_by_keywords(keywords_values_dict)
        >>> dicom.metadata.count()
        2

        #After drop_rows
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   1  >> dicom.pixeldata.inspect(truncate=30)
        [#]  id  imagematrix
        ===========================================================
        [0]   1  [[   0.    0.    0. ...,    0.    0.    0.]
        [   0.   70.   85. ...,  215.  288.  337.]
        [   0.   63.   72. ...,  228.  269.  317.]
        ...,
        [   0.   42.   40. ...,  966.  919.  871.]
        [   0.   42.   33. ...,  988.  887.  860.]
        [   0.   46.   38. ...,  983.  876.  885.]]
        [1]   2  [[    0.     0.     0. ...,     0.     0.     0.]
        [    0.   111.   117. ...,   159.   148.   135.]
        [    0.   116.   111. ...,   152.   138.   139.]
        ...,
        [    0.    49.    18. ...,  1057.   965.   853.]
        [    0.    42.    20. ...,  1046.   973.   891.]
        [    0.    48.    26. ...,  1041.   969.   930.]]

    """

    if not isinstance(keywords_values_dict, dict):
        raise TypeError("keywords_values_dict should be a type of dict, but found type as %" % type(keywords_values_dict))

    for key, value in keywords_values_dict.iteritems():
        if not isinstance(key, basestring) or not isinstance(value, basestring):
            raise TypeError("both keyword and value should be of ")

    #Always scala dicom is invoked, as python joins are expensive compared to serailizations.
    def f(scala_dicom):
        scala_dicom.dropRowsByKeywords(self._tc.jutils.convert.to_scala_map(keywords_values_dict))

    self._call_scala(f)

def drop_rows_by_tags(

self, tags_values_dict)

Drop the rows based on dictionary of {"tag":"value"}(applying 'and' operation on dictionary) from column holding xml string

Ex: tags_values_dict -> {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}

Parameters:
tags_values_dict(dict(str, str)):dictionary of tags and values from xml string in metadata
Examples:
>>> dicom_path = "../datasets/dicom_uncompressed"

>>> dicom = tc.dicom.import_dcm(dicom_path)

>>> dicom.metadata.count()
3

>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...

>>> tags_values_dict = {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}
>>> dicom.drop_rows_by_tags(tags_values_dict)
>>> dicom.metadata.count()
2


#After drop_rows
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   1  <?xml version="1.0" encodin...
[1]   2  <?xml version="1.0" encodin...

>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
===========================================================
[0]   1  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.   70.   85. ...,  215.  288.  337.]
[   0.   63.   72. ...,  228.  269.  317.]
...,
[   0.   42.   40. ...,  966.  919.  871.]
[   0.   42.   33. ...,  988.  887.  860.]
[   0.   46.   38. ...,  983.  876.  885.]]
[1]   2  [[    0.     0.     0. ...,     0.     0.     0.]
[    0.   111.   117. ...,   159.   148.   135.]
[    0.   116.   111. ...,   152.   138.   139.]
...,
[    0.    49.    18. ...,  1057.   965.   853.]
[    0.    42.    20. ...,  1046.   973.   891.]
[    0.    48.    26. ...,  1041.   969.   930.]]
def drop_rows_by_tags(self, tags_values_dict):
    """
    Drop the rows based on dictionary of {"tag":"value"}(applying 'and' operation on dictionary) from column holding xml string

    Ex: tags_values_dict -> {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}

    Parameters
    ----------

    :param tags_values_dict: (dict(str, str)) dictionary of tags and values from xml string in metadata


    Examples
    --------

        >>> dicom_path = "../datasets/dicom_uncompressed"

        >>> dicom = tc.dicom.import_dcm(dicom_path)

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE=
                1.2.840.10008.5.1.4.1.1.4
                1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
                ...

        >>> tags_values_dict = {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}
        >>> dicom.drop_rows_by_tags(tags_values_dict)
        >>> dicom.metadata.count()
        2


        #After drop_rows
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   1  >> dicom.pixeldata.inspect(truncate=30)
        [#]  id  imagematrix
        ===========================================================
        [0]   1  [[   0.    0.    0. ...,    0.    0.    0.]
        [   0.   70.   85. ...,  215.  288.  337.]
        [   0.   63.   72. ...,  228.  269.  317.]
        ...,
        [   0.   42.   40. ...,  966.  919.  871.]
        [   0.   42.   33. ...,  988.  887.  860.]
        [   0.   46.   38. ...,  983.  876.  885.]]
        [1]   2  [[    0.     0.     0. ...,     0.     0.     0.]
        [    0.   111.   117. ...,   159.   148.   135.]
        [    0.   116.   111. ...,   152.   138.   139.]
        ...,
        [    0.    49.    18. ...,  1057.   965.   853.]
        [    0.    42.    20. ...,  1046.   973.   891.]
        [    0.    48.    26. ...,  1041.   969.   930.]]

    """

    if not isinstance(tags_values_dict, dict):
        raise TypeError("tags_values_dict should be a type of dict, but found type as %" % type(tags_values_dict))

    for tag, value in tags_values_dict.iteritems():
        if not isinstance(tag, basestring) or not isinstance(value, basestring):
            raise TypeError("both tag and value should be of ")

    #Always scala dicom is invoked, as python joins are expensive compared to serailizations.
    def f(scala_dicom):
        scala_dicom.dropRowsByTags(self._tc.jutils.convert.to_scala_map(tags_values_dict))

    self._call_scala(f)

def export_to_dcm(

self, path)

export_to_dcm creates .dcm image from dicom object with (metadata, imagedata) and saves to given path

Parameters:
path(str):local/hdfs path
Examples:
>>> dicom_path = "../datasets/dicom_uncompressed"

>>> dicom = tc.dicom.import_dcm(dicom_path)

>>> dicom.metadata.count()
3

>>> dicom.pixeldata.count()
3

>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...

#pixeldata property is sparktk frame
>>> pixeldata = dicom.pixeldata.take(1)

#dispaly
>>> pixeldata
[[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
[   0.,  125.,  103., ...,  120.,  213.,  319.],
[   0.,  117.,   94., ...,  135.,  223.,  325.],
...,
[   0.,   62.,   21., ...,  896.,  886.,  854.],
[   0.,   63.,   23., ...,  941.,  872.,  897.],
[   0.,   60.,   30., ...,  951.,  822.,  906.]])]]

>>> dicom.export_to_dcm("dicom_export")
def export_to_dcm(self, path):
    """
    export_to_dcm creates .dcm image from dicom object with (metadata, imagedata) and saves to given path

    Parameters
    ----------

    :param path: (str) local/hdfs path


    Examples
    --------

        >>> dicom_path = "../datasets/dicom_uncompressed"

        >>> dicom = tc.dicom.import_dcm(dicom_path)

        >>> dicom.metadata.count()
        3

        >>> dicom.pixeldata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE=
                1.2.840.10008.5.1.4.1.1.4
                1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
                ...

        #pixeldata property is sparktk frame
        >>> pixeldata = dicom.pixeldata.take(1)

        #dispaly
        >>> pixeldata
        [[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
        [   0.,  125.,  103., ...,  120.,  213.,  319.],
        [   0.,  117.,   94., ...,  135.,  223.,  325.],
        ...,
        [   0.,   62.,   21., ...,  896.,  886.,  854.],
        [   0.,   63.,   23., ...,  941.,  872.,  897.],
        [   0.,   60.,   30., ...,  951.,  822.,  906.]])]]

        >>> dicom.export_to_dcm("dicom_export")

    """

    if not isinstance(path, basestring):
        raise TypeError("path must be a type of string, but found type as " % type(path))

    def f(scala_dicom):
        scala_dicom.exportToDcm(path)

    self._call_scala(f)

def extract_keywords(

self, keywords)

Extracts value for each keyword from column holding xml string and adds column for each keyword to assign value For missing keyword, the value is None

Ex: keywords -> ["PatientID"]

Parameters:
keywords(str or list(str)):List of keywords from metadata xml string
Examples:
>>> dicom_path = "../datasets/dicom_uncompressed"

>>> dicom = tc.dicom.import_dcm(dicom_path)

>>> dicom.metadata.count()
3

>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...

#Extract values for given keywords and add as new columns in metadata frame
>>> dicom.extract_keywords(["SOPInstanceUID", "Manufacturer", "StudyDate"])

>>> dicom.metadata.count()
3

>>> dicom.metadata.column_names
[u'id', u'metadata', u'SOPInstanceUID', u'Manufacturer', u'StudyDate']

>>> dicom.metadata.inspect(truncate=20)
[#]  id  metadata              SOPInstanceUID        Manufacturer  StudyDate
============================================================================
[0]   0  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
[1]   1  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
[2]   2  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
def extract_keywords(self, keywords):
    """

    Extracts value for each keyword from column holding xml string and adds column for each keyword to assign value
    For missing keyword, the value is None

    Ex: keywords -> ["PatientID"]

    Parameters
    ----------

    :param keywords: (str or list(str)) List of keywords from metadata xml string


    Examples
    --------

        >>> dicom_path = "../datasets/dicom_uncompressed"

        >>> dicom = tc.dicom.import_dcm(dicom_path)

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE=
                1.2.840.10008.5.1.4.1.1.4
                1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
                ...

        #Extract values for given keywords and add as new columns in metadata frame
        >>> dicom.extract_keywords(["SOPInstanceUID", "Manufacturer", "StudyDate"])

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.column_names
        [u'id', u'metadata', u'SOPInstanceUID', u'Manufacturer', u'StudyDate']

        >>> dicom.metadata.inspect(truncate=20)
        [#]  id  metadata              SOPInstanceUID        Manufacturer  StudyDate
        ============================================================================
        [0]   0  

def extract_tags(

self, tags)

Extracts value for each tag from column holding xml string and adds column for each tag to assign value. For missing tag, the value is None

Ex: tags -> ["00020001", "00020002"]

Parameters:
tags(str or list(str)):List of tags from xml string of metadata column
Examples:
>>> dicom_path = "../datasets/dicom_uncompressed"

>>> dicom = tc.dicom.import_dcm(dicom_path)

>>> dicom.metadata.count()
3

>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...

#Extract value for each tag from column holding xml string
>>> dicom.extract_tags(["00080018", "00080070", "00080030"])

>>> dicom.metadata.count()
3

>>> dicom.metadata.column_names
[u'id', u'metadata', u'00080018', u'00080070', u'00080030']

>>> dicom.metadata.inspect(truncate=20)
[#]  id  metadata              00080018              00080070  00080030
============================================================================
[0]   0  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
[1]   1  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
[2]   2  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
def extract_tags(self, tags):
    """
    Extracts value for each tag from column holding xml string and adds column for each tag to assign value.
    For missing tag, the value is None

    Ex: tags -> ["00020001", "00020002"]

    Parameters
    ----------

    :param tags: (str or list(str)) List of tags from xml string of metadata column


    Examples
    --------

        >>> dicom_path = "../datasets/dicom_uncompressed"

        >>> dicom = tc.dicom.import_dcm(dicom_path)

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE=
                1.2.840.10008.5.1.4.1.1.4
                1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
                ...

        #Extract value for each tag from column holding xml string
        >>> dicom.extract_tags(["00080018", "00080070", "00080030"])

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.column_names
        [u'id', u'metadata', u'00080018', u'00080070', u'00080030']

        >>> dicom.metadata.inspect(truncate=20)
        [#]  id  metadata              00080018              00080070  00080030
        ============================================================================
        [0]   0  

def filter(

self, predicate)

Filter the rows of dicom metadata and pixeldata based on given predicate

Parameters:
predicate: predicate to apply on filter
Examples:
>>> dicom_path = "../datasets/dicom_uncompressed"

>>> dicom = tc.dicom.import_dcm(dicom_path)

>>> dicom.metadata.count()
3

>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...

>>> import xml.etree.ElementTree as ET

#sample custom filter function
>>> def filter_meta(tag_name, tag_value):
...    def _filter_meta(row):
...        root = ET.fromstring(row["metadata"])
...        for attribute in root.findall('DicomAttribute'):
...            keyword = attribute.get('keyword')
...            if attribute.get('keyword') is not None:
...                if attribute.find('Value') is not None:
...                    value = attribute.find('Value').text
...                    if keyword == tag_name and value == tag_value:
...                        return True
...    return _filter_meta

>>> tag_name = "SOPInstanceUID"

>>> tag_value = "1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685"

>>> dicom.filter(filter_meta(tag_name, tag_value))
>>> dicom.metadata.count()
1

#After filter
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...

>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
=====================================================
[0]   0  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.  125.  103. ...,  120.  213.  319.]
[   0.  117.   94. ...,  135.  223.  325.]
...,
[   0.   62.   21. ...,  896.  886.  854.]
[   0.   63.   23. ...,  941.  872.  897.]
[   0.   60.   30. ...,  951.  822.  906.]]
def filter(self, predicate):

    """
    Filter the rows of dicom metadata and pixeldata based on given predicate

    Parameters
    ----------

    :param predicate: predicate to apply on filter


    Examples
    --------

        >>> dicom_path = "../datasets/dicom_uncompressed"

        >>> dicom = tc.dicom.import_dcm(dicom_path)

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE=
                1.2.840.10008.5.1.4.1.1.4
                1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
                ...

        >>> import xml.etree.ElementTree as ET

        #sample custom filter function
        >>> def filter_meta(tag_name, tag_value):
        ...    def _filter_meta(row):
        ...        root = ET.fromstring(row["metadata"])
        ...        for attribute in root.findall('DicomAttribute'):
        ...            keyword = attribute.get('keyword')
        ...            if attribute.get('keyword') is not None:
        ...                if attribute.find('Value') is not None:
        ...                    value = attribute.find('Value').text
        ...                    if keyword == tag_name and value == tag_value:
        ...                        return True
        ...    return _filter_meta

        >>> tag_name = "SOPInstanceUID"

        >>> tag_value = "1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685"

        >>> dicom.filter(filter_meta(tag_name, tag_value))
        >>> dicom.metadata.count()
        1

        #After filter
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  >> dicom.pixeldata.inspect(truncate=30)
        [#]  id  imagematrix
        =====================================================
        [0]   0  [[   0.    0.    0. ...,    0.    0.    0.]
        [   0.  125.  103. ...,  120.  213.  319.]
        [   0.  117.   94. ...,  135.  223.  325.]
        ...,
        [   0.   62.   21. ...,  896.  886.  854.]
        [   0.   63.   23. ...,  941.  872.  897.]
        [   0.   60.   30. ...,  951.  822.  906.]]

    """

    self.metadata.filter(predicate)
    filtered_id_frame = self.metadata.copy(columns = "id")
    self._pixeldata = filtered_id_frame.join_inner(self.pixeldata, "id")

def filter_by_keywords(

self, keywords_values_dict)

Filter the rows based on dictionary of {"keyword":"value"}(applying 'and' operation on dictionary) from column holding xml string

Ex: keywords_values_dict -> {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}

Parameters:
keywords_values_dict(dict(str, str)):dictionary of keywords and values from xml string in metadata
Examples:
>>> dicom_path = "../datasets/dicom_uncompressed"

>>> dicom = tc.dicom.import_dcm(dicom_path)

>>> dicom.metadata.count()
3

>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...

>>> keywords_values_dict = {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
>>> dicom.filter_by_keywords(keywords_values_dict)
>>> dicom.metadata.count()
1

#After filter
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...

>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
=====================================================
[0]   0  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.  125.  103. ...,  120.  213.  319.]
[   0.  117.   94. ...,  135.  223.  325.]
...,
[   0.   62.   21. ...,  896.  886.  854.]
[   0.   63.   23. ...,  941.  872.  897.]
[   0.   60.   30. ...,  951.  822.  906.]]
def filter_by_keywords(self, keywords_values_dict):
    """
    Filter the rows based on dictionary of {"keyword":"value"}(applying 'and' operation on dictionary) from column holding xml string

    Ex: keywords_values_dict -> {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}

    Parameters
    ----------

    :param keywords_values_dict: (dict(str, str)) dictionary of keywords and values from xml string in metadata


    Examples
    --------

        >>> dicom_path = "../datasets/dicom_uncompressed"

        >>> dicom = tc.dicom.import_dcm(dicom_path)

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE=
                1.2.840.10008.5.1.4.1.1.4
                1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
                ...

        >>> keywords_values_dict = {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
        >>> dicom.filter_by_keywords(keywords_values_dict)
        >>> dicom.metadata.count()
        1

        #After filter
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  >> dicom.pixeldata.inspect(truncate=30)
        [#]  id  imagematrix
        =====================================================
        [0]   0  [[   0.    0.    0. ...,    0.    0.    0.]
        [   0.  125.  103. ...,  120.  213.  319.]
        [   0.  117.   94. ...,  135.  223.  325.]
        ...,
        [   0.   62.   21. ...,  896.  886.  854.]
        [   0.   63.   23. ...,  941.  872.  897.]
        [   0.   60.   30. ...,  951.  822.  906.]]

    """

    if not isinstance(keywords_values_dict, dict):
        raise TypeError("keywords_values_dict should be a type of dict, but found type as %" % type(keywords_values_dict))

    for key, value in keywords_values_dict.iteritems():
        if not isinstance(key, basestring) or not isinstance(value, basestring):
            raise TypeError("both keyword and value should be of ")

    #Always scala dicom is invoked, as python joins are expensive compared to serailizations.
    def f(scala_dicom):
        scala_dicom.filterByKeywords(self._tc.jutils.convert.to_scala_map(keywords_values_dict))

    self._call_scala(f)

def filter_by_tags(

self, tags_values_dict)

Filter the rows based on dictionary of {"tag":"value"}(applying 'and' operation on dictionary) from column holding xml string

Ex: tags_values_dict -> {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}

Parameters:
tags_values_dict(dict(str, str)):dictionary of tags and values from xml string in metadata.
Examples:
>>> dicom_path = "../datasets/dicom_uncompressed"

>>> dicom = tc.dicom.import_dcm(dicom_path)

>>> dicom.metadata.count()
3

>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...

#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...

>>> tags_values_dict = {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}
>>> dicom.filter_by_tags(tags_values_dict)
>>> dicom.metadata.count()
1

#After filter
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...

>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
=====================================================
[0]   0  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.  125.  103. ...,  120.  213.  319.]
[   0.  117.   94. ...,  135.  223.  325.]
...,
[   0.   62.   21. ...,  896.  886.  854.]
[   0.   63.   23. ...,  941.  872.  897.]
[   0.   60.   30. ...,  951.  822.  906.]]
def filter_by_tags(self, tags_values_dict):
    """
    Filter the rows based on dictionary of {"tag":"value"}(applying 'and' operation on dictionary) from column holding xml string

    Ex: tags_values_dict -> {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}

    Parameters
    ----------

    :param tags_values_dict: (dict(str, str)) dictionary of tags and values from xml string in metadata.


    Examples
    --------

        >>> dicom_path = "../datasets/dicom_uncompressed"

        >>> dicom = tc.dicom.import_dcm(dicom_path)

        >>> dicom.metadata.count()
        3

        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE=
                1.2.840.10008.5.1.4.1.1.4
                1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
                ...

        >>> tags_values_dict = {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}
        >>> dicom.filter_by_tags(tags_values_dict)
        >>> dicom.metadata.count()
        1

        #After filter
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  >> dicom.pixeldata.inspect(truncate=30)
        [#]  id  imagematrix
        =====================================================
        [0]   0  [[   0.    0.    0. ...,    0.    0.    0.]
        [   0.  125.  103. ...,  120.  213.  319.]
        [   0.  117.   94. ...,  135.  223.  325.]
        ...,
        [   0.   62.   21. ...,  896.  886.  854.]
        [   0.   63.   23. ...,  941.  872.  897.]
        [   0.   60.   30. ...,  951.  822.  906.]]

    """

    if not isinstance(tags_values_dict, dict):
        raise TypeError("tags_values_dict should be a type of dict, but found type as %" % type(tags_values_dict))

    for tag, value in tags_values_dict.iteritems():
        if not isinstance(tag, basestring) or not isinstance(value, basestring):
            raise TypeError("both tag and value should be of ")

    #Always scala dicom is invoked, as python joins are expensive compared to serailizations.
    def f(scala_dicom):
        scala_dicom.filterByTags(self._tc.jutils.convert.to_scala_map(tags_values_dict))

    self._call_scala(f)

def save(

self, path)

Persists the dicom object to the given file path

def save(self, path):
    """Persists the dicom object to the given file path"""
    self._get_new_scala().save(path)
spark-tk Python API Documentation