sparktk.frame.ops.collect module
# vim: set encoding=utf-8
# Copyright (c) 2016 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from sparktk.frame.ops.take import TakeCollectHelper
from sparktk.frame.schema import get_schema_for_columns
from sparktk.arguments import affirm_type
def collect(self, columns=None):
"""
Brings all the rows of data from the frame into a local python list of lists
(Use the 'take' operation for control over row count and offset of the collected data)
Parameters
----------
:param columns: (Optional[str or List[str]) If not None, only the given columns' data will be provided.
By default, all columns are included.
:return: (List[List[*]]) the frame data represented as a list of lists
Examples
--------
>>> schema = [('name',str), ('age', int), ('tenure', int), ('phone', str)]
>>> rows = [['Fred', 39, 16, '555-1234'], ['Susan', 33, 3, '555-0202'], ['Thurston', 65, 26, '555-4510'], ['Judy', 44, 14, '555-2183']]
>>> frame = tc.frame.create(rows, schema)
>>> frame.collect()
[['Fred', 39, 16, '555-1234'], ['Susan', 33, 3, '555-0202'], ['Thurston', 65, 26, '555-4510'], ['Judy', 44, 14, '555-2183']]
>>> frame.collect(['name', 'phone'])
[['Fred', '555-1234'], ['Susan', '555-0202'], ['Thurston', '555-4510'], ['Judy', '555-2183']]
"""
if columns is not None:
affirm_type.list_of_str(columns, "columns")
if not columns:
return []
if self._is_scala:
scala_data = self._scala.collect(self._tc.jutils.convert.to_scala_option_list_string(columns))
schema = get_schema_for_columns(self.schema, columns) if columns else self.schema
data = TakeCollectHelper.scala_rows_to_python(self._tc, scala_data, schema)
else:
if columns:
select = TakeCollectHelper.get_select_columns_function(self.schema, columns)
data = self._python.rdd.map(select).collect()
else:
data = self._python.rdd.collect()
return data
Functions
def collect(
self, columns=None)
Brings all the rows of data from the frame into a local python list of lists
(Use the 'take' operation for control over row count and offset of the collected data)
Parameters:
columns | (Optional[str or List[str]): | If not None, only the given columns' data will be provided. By default, all columns are included. |
Returns | (List[List[*]]): | the frame data represented as a list of lists |
Examples:
>>> schema = [('name',str), ('age', int), ('tenure', int), ('phone', str)]
>>> rows = [['Fred', 39, 16, '555-1234'], ['Susan', 33, 3, '555-0202'], ['Thurston', 65, 26, '555-4510'], ['Judy', 44, 14, '555-2183']]
>>> frame = tc.frame.create(rows, schema)
>>> frame.collect()
[['Fred', 39, 16, '555-1234'], ['Susan', 33, 3, '555-0202'], ['Thurston', 65, 26, '555-4510'], ['Judy', 44, 14, '555-2183']]
>>> frame.collect(['name', 'phone'])
[['Fred', '555-1234'], ['Susan', '555-0202'], ['Thurston', '555-4510'], ['Judy', '555-2183']]
def collect(self, columns=None):
"""
Brings all the rows of data from the frame into a local python list of lists
(Use the 'take' operation for control over row count and offset of the collected data)
Parameters
----------
:param columns: (Optional[str or List[str]) If not None, only the given columns' data will be provided.
By default, all columns are included.
:return: (List[List[*]]) the frame data represented as a list of lists
Examples
--------
>>> schema = [('name',str), ('age', int), ('tenure', int), ('phone', str)]
>>> rows = [['Fred', 39, 16, '555-1234'], ['Susan', 33, 3, '555-0202'], ['Thurston', 65, 26, '555-4510'], ['Judy', 44, 14, '555-2183']]
>>> frame = tc.frame.create(rows, schema)
>>> frame.collect()
[['Fred', 39, 16, '555-1234'], ['Susan', 33, 3, '555-0202'], ['Thurston', 65, 26, '555-4510'], ['Judy', 44, 14, '555-2183']]
>>> frame.collect(['name', 'phone'])
[['Fred', '555-1234'], ['Susan', '555-0202'], ['Thurston', '555-4510'], ['Judy', '555-2183']]
"""
if columns is not None:
affirm_type.list_of_str(columns, "columns")
if not columns:
return []
if self._is_scala:
scala_data = self._scala.collect(self._tc.jutils.convert.to_scala_option_list_string(columns))
schema = get_schema_for_columns(self.schema, columns) if columns else self.schema
data = TakeCollectHelper.scala_rows_to_python(self._tc, scala_data, schema)
else:
if columns:
select = TakeCollectHelper.get_select_columns_function(self.schema, columns)
data = self._python.rdd.map(select).collect()
else:
data = self._python.rdd.collect()
return data