sparktk.frame.constructors.import_hive module
# vim: set encoding=utf-8
# Copyright (c) 2016 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from sparktk.tkcontext import TkContext
def import_hive(hive_query, tc=TkContext.implicit):
"""
Import data from hive table into frame.
Define the sql query to retrieve the data from a hive table.
Only a subset of Hive data types are supported:
DataType Support
---------- ------------------------------------
boolean cast to int
bigint native support
int native support
tinyint cast to int
smallint cast to int
decimal cast to double, may lose precision
double native support
float native support
date cast to string
string native support
timestamp cast to string
varchar cast to string
arrays not supported
binary not supported
char not supported
maps not supported
structs not supported
union not supported
Parameters
----------
:param hive_query: (str) hive query to fetch data from table
:param tc: (TkContext) TK context
:return: (Frame) returns frame with hive table data
Examples
--------
Load data into frame from a hive table based on hive query
>>> h_query = "select * from demo_test"
>>> frame = tc.frame.import_hive(h_query)
-etc-
>>> frame.inspect()
[#] number strformat
======================
[0] 1 one
[1] 2 two
[2] 3 three
[3] 4 four
"""
if not isinstance(hive_query, basestring):
raise ValueError("hive query parameter must be a string, but is {0}.".format(type(hive_query)))
TkContext.validate(tc)
scala_frame = tc.sc._jvm.org.trustedanalytics.sparktk.frame.internal.constructors.Import.importHive(tc.jutils.get_scala_sc(), hive_query)
from sparktk.frame.frame import Frame
return Frame(tc, scala_frame)
Functions
def import_hive(
hive_query, tc=<class 'sparktk.arguments.implicit'>)
Import data from hive table into frame.
Define the sql query to retrieve the data from a hive table.
Only a subset of Hive data types are supported:
DataType Support
---------- ------------------------------------
boolean cast to int
bigint native support
int native support
tinyint cast to int
smallint cast to int
decimal cast to double, may lose precision
double native support
float native support
date cast to string
string native support
timestamp cast to string
varchar cast to string
arrays not supported
binary not supported
char not supported
maps not supported
structs not supported
union not supported
Parameters:
hive_query | (str): | hive query to fetch data from table |
tc | (TkContext): | TK context |
Returns | (Frame): | returns frame with hive table data |
Examples:
Load data into frame from a hive table based on hive query
>>> h_query = "select * from demo_test"
>>> frame = tc.frame.import_hive(h_query)
-etc-
>>> frame.inspect()
[#] number strformat
======================
[0] 1 one
[1] 2 two
[2] 3 three
[3] 4 four
def import_hive(hive_query, tc=TkContext.implicit):
"""
Import data from hive table into frame.
Define the sql query to retrieve the data from a hive table.
Only a subset of Hive data types are supported:
DataType Support
---------- ------------------------------------
boolean cast to int
bigint native support
int native support
tinyint cast to int
smallint cast to int
decimal cast to double, may lose precision
double native support
float native support
date cast to string
string native support
timestamp cast to string
varchar cast to string
arrays not supported
binary not supported
char not supported
maps not supported
structs not supported
union not supported
Parameters
----------
:param hive_query: (str) hive query to fetch data from table
:param tc: (TkContext) TK context
:return: (Frame) returns frame with hive table data
Examples
--------
Load data into frame from a hive table based on hive query
>>> h_query = "select * from demo_test"
>>> frame = tc.frame.import_hive(h_query)
-etc-
>>> frame.inspect()
[#] number strformat
======================
[0] 1 one
[1] 2 two
[2] 3 three
[3] 4 four
"""
if not isinstance(hive_query, basestring):
raise ValueError("hive query parameter must be a string, but is {0}.".format(type(hive_query)))
TkContext.validate(tc)
scala_frame = tc.sc._jvm.org.trustedanalytics.sparktk.frame.internal.constructors.Import.importHive(tc.jutils.get_scala_sc(), hive_query)
from sparktk.frame.frame import Frame
return Frame(tc, scala_frame)